rishabh5752 commited on
Commit
c2cb9ad
1 Parent(s): 17829f2

Upload 7 files

Browse files
faiss_store_openai.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50fc100d2bd725fcc1b8855388f7050c2b3011f0651f84adeb4beabc11e6e7f3
3
+ size 408881
faiss_tutorial.ipynb ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "c9493399",
7
+ "metadata": {
8
+ "scrolled": false
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "#Install Packages\n",
13
+ "!pip install faiss-cpu\n",
14
+ "!pip install sentence-transformers"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 2,
20
+ "id": "c49be142",
21
+ "metadata": {},
22
+ "outputs": [],
23
+ "source": [
24
+ "# import necessary libraries\n",
25
+ "import pandas as pd\n",
26
+ "pd.set_option('display.max_colwidth', 100)"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 3,
32
+ "id": "f5a30989",
33
+ "metadata": {},
34
+ "outputs": [
35
+ {
36
+ "data": {
37
+ "text/plain": [
38
+ "(8, 2)"
39
+ ]
40
+ },
41
+ "execution_count": 3,
42
+ "metadata": {},
43
+ "output_type": "execute_result"
44
+ }
45
+ ],
46
+ "source": [
47
+ "df = pd.read_csv(\"sample_text.csv\")\n",
48
+ "df.shape"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 49,
54
+ "id": "b72e2ecb",
55
+ "metadata": {
56
+ "scrolled": true
57
+ },
58
+ "outputs": [
59
+ {
60
+ "data": {
61
+ "text/html": [
62
+ "<div>\n",
63
+ "<style scoped>\n",
64
+ " .dataframe tbody tr th:only-of-type {\n",
65
+ " vertical-align: middle;\n",
66
+ " }\n",
67
+ "\n",
68
+ " .dataframe tbody tr th {\n",
69
+ " vertical-align: top;\n",
70
+ " }\n",
71
+ "\n",
72
+ " .dataframe thead th {\n",
73
+ " text-align: right;\n",
74
+ " }\n",
75
+ "</style>\n",
76
+ "<table border=\"1\" class=\"dataframe\">\n",
77
+ " <thead>\n",
78
+ " <tr style=\"text-align: right;\">\n",
79
+ " <th></th>\n",
80
+ " <th>text</th>\n",
81
+ " <th>category</th>\n",
82
+ " </tr>\n",
83
+ " </thead>\n",
84
+ " <tbody>\n",
85
+ " <tr>\n",
86
+ " <th>0</th>\n",
87
+ " <td>Meditation and yoga can improve mental health</td>\n",
88
+ " <td>Health</td>\n",
89
+ " </tr>\n",
90
+ " <tr>\n",
91
+ " <th>1</th>\n",
92
+ " <td>Fruits, whole grains and vegetables helps control blood pressure</td>\n",
93
+ " <td>Health</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>2</th>\n",
97
+ " <td>These are the latest fashion trends for this week</td>\n",
98
+ " <td>Fashion</td>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>3</th>\n",
102
+ " <td>Vibrant color jeans for male are becoming a trend</td>\n",
103
+ " <td>Fashion</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>4</th>\n",
107
+ " <td>The concert starts at 7 PM tonight</td>\n",
108
+ " <td>Event</td>\n",
109
+ " </tr>\n",
110
+ " <tr>\n",
111
+ " <th>5</th>\n",
112
+ " <td>Navaratri dandiya program at Expo center in Mumbai this october</td>\n",
113
+ " <td>Event</td>\n",
114
+ " </tr>\n",
115
+ " <tr>\n",
116
+ " <th>6</th>\n",
117
+ " <td>Exciting vacation destinations for your next trip</td>\n",
118
+ " <td>Travel</td>\n",
119
+ " </tr>\n",
120
+ " <tr>\n",
121
+ " <th>7</th>\n",
122
+ " <td>Maldives and Srilanka are gaining popularity in terms of low budget vacation places</td>\n",
123
+ " <td>Travel</td>\n",
124
+ " </tr>\n",
125
+ " </tbody>\n",
126
+ "</table>\n",
127
+ "</div>"
128
+ ],
129
+ "text/plain": [
130
+ " text \\\n",
131
+ "0 Meditation and yoga can improve mental health \n",
132
+ "1 Fruits, whole grains and vegetables helps control blood pressure \n",
133
+ "2 These are the latest fashion trends for this week \n",
134
+ "3 Vibrant color jeans for male are becoming a trend \n",
135
+ "4 The concert starts at 7 PM tonight \n",
136
+ "5 Navaratri dandiya program at Expo center in Mumbai this october \n",
137
+ "6 Exciting vacation destinations for your next trip \n",
138
+ "7 Maldives and Srilanka are gaining popularity in terms of low budget vacation places \n",
139
+ "\n",
140
+ " category \n",
141
+ "0 Health \n",
142
+ "1 Health \n",
143
+ "2 Fashion \n",
144
+ "3 Fashion \n",
145
+ "4 Event \n",
146
+ "5 Event \n",
147
+ "6 Travel \n",
148
+ "7 Travel "
149
+ ]
150
+ },
151
+ "execution_count": 49,
152
+ "metadata": {},
153
+ "output_type": "execute_result"
154
+ }
155
+ ],
156
+ "source": [
157
+ "df"
158
+ ]
159
+ },
160
+ {
161
+ "cell_type": "markdown",
162
+ "id": "2d935944",
163
+ "metadata": {},
164
+ "source": [
165
+ "### Step 1 : Create source embeddings for the text column"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 5,
171
+ "id": "cd04834b",
172
+ "metadata": {},
173
+ "outputs": [
174
+ {
175
+ "name": "stderr",
176
+ "output_type": "stream",
177
+ "text": [
178
+ "C:\\Users\\dhava\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
179
+ " from .autonotebook import tqdm as notebook_tqdm\n"
180
+ ]
181
+ }
182
+ ],
183
+ "source": [
184
+ "from sentence_transformers import SentenceTransformer"
185
+ ]
186
+ },
187
+ {
188
+ "cell_type": "code",
189
+ "execution_count": 6,
190
+ "id": "03ed4874",
191
+ "metadata": {},
192
+ "outputs": [],
193
+ "source": [
194
+ "encoder = SentenceTransformer(\"all-mpnet-base-v2\")\n",
195
+ "vectors = encoder.encode(df.text)"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": 7,
201
+ "id": "b8b8c1ce",
202
+ "metadata": {},
203
+ "outputs": [
204
+ {
205
+ "data": {
206
+ "text/plain": [
207
+ "(8, 768)"
208
+ ]
209
+ },
210
+ "execution_count": 7,
211
+ "metadata": {},
212
+ "output_type": "execute_result"
213
+ }
214
+ ],
215
+ "source": [
216
+ "vectors.shape"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 8,
222
+ "id": "8e5c7da8",
223
+ "metadata": {},
224
+ "outputs": [
225
+ {
226
+ "data": {
227
+ "text/plain": [
228
+ "768"
229
+ ]
230
+ },
231
+ "execution_count": 8,
232
+ "metadata": {},
233
+ "output_type": "execute_result"
234
+ }
235
+ ],
236
+ "source": [
237
+ "dim = vectors.shape[1]\n",
238
+ "dim"
239
+ ]
240
+ },
241
+ {
242
+ "cell_type": "markdown",
243
+ "id": "149e6b32",
244
+ "metadata": {},
245
+ "source": [
246
+ "### Step 2 : Build a FAISS Index for vectors"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": 9,
252
+ "id": "1033b6bd",
253
+ "metadata": {},
254
+ "outputs": [],
255
+ "source": [
256
+ "import faiss\n",
257
+ "\n",
258
+ "index = faiss.IndexFlatL2(dim)"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "markdown",
263
+ "id": "76ad509d",
264
+ "metadata": {},
265
+ "source": [
266
+ "### Step 3 : Normalize the source vectors (as we are using L2 distance to measure similarity) and add to the index"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 10,
272
+ "id": "90b527fc",
273
+ "metadata": {},
274
+ "outputs": [],
275
+ "source": [
276
+ "index.add(vectors)"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "code",
281
+ "execution_count": 11,
282
+ "id": "7ac0b8ef",
283
+ "metadata": {},
284
+ "outputs": [
285
+ {
286
+ "data": {
287
+ "text/plain": [
288
+ "<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0x00000252CF6123D0> >"
289
+ ]
290
+ },
291
+ "execution_count": 11,
292
+ "metadata": {},
293
+ "output_type": "execute_result"
294
+ }
295
+ ],
296
+ "source": [
297
+ "index"
298
+ ]
299
+ },
300
+ {
301
+ "cell_type": "markdown",
302
+ "id": "6c42234c",
303
+ "metadata": {},
304
+ "source": [
305
+ "### Step 4 : Encode search text using same encorder and normalize the output vector"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 64,
311
+ "id": "018faf33",
312
+ "metadata": {},
313
+ "outputs": [
314
+ {
315
+ "data": {
316
+ "text/plain": [
317
+ "(768,)"
318
+ ]
319
+ },
320
+ "execution_count": 64,
321
+ "metadata": {},
322
+ "output_type": "execute_result"
323
+ }
324
+ ],
325
+ "source": [
326
+ "search_query = \"I want to buy a polo t-shirt\"\n",
327
+ "# search_query = \"looking for places to visit during the holidays\"\n",
328
+ "# search_query = \"An apple a day keeps the doctor away\"\n",
329
+ "vec = encoder.encode(search_query)\n",
330
+ "vec.shape"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": 66,
336
+ "id": "af05bce3",
337
+ "metadata": {},
338
+ "outputs": [
339
+ {
340
+ "data": {
341
+ "text/plain": [
342
+ "(1, 768)"
343
+ ]
344
+ },
345
+ "execution_count": 66,
346
+ "metadata": {},
347
+ "output_type": "execute_result"
348
+ }
349
+ ],
350
+ "source": [
351
+ "import numpy as np\n",
352
+ "svec = np.array(vec).reshape(1,-1)\n",
353
+ "svec.shape"
354
+ ]
355
+ },
356
+ {
357
+ "cell_type": "code",
358
+ "execution_count": 67,
359
+ "id": "84275adf",
360
+ "metadata": {},
361
+ "outputs": [],
362
+ "source": [
363
+ "# faiss.normalize_L2(svec)"
364
+ ]
365
+ },
366
+ {
367
+ "cell_type": "markdown",
368
+ "id": "90c0cdd8",
369
+ "metadata": {},
370
+ "source": [
371
+ "### Step 5: Search for similar vector in the FAISS index created"
372
+ ]
373
+ },
374
+ {
375
+ "cell_type": "code",
376
+ "execution_count": 68,
377
+ "id": "3d5a0e69",
378
+ "metadata": {},
379
+ "outputs": [
380
+ {
381
+ "data": {
382
+ "text/plain": [
383
+ "array([[1.3844836, 1.4039096]], dtype=float32)"
384
+ ]
385
+ },
386
+ "execution_count": 68,
387
+ "metadata": {},
388
+ "output_type": "execute_result"
389
+ }
390
+ ],
391
+ "source": [
392
+ "distances, I = index.search(new_vec, k=2)\n",
393
+ "distances"
394
+ ]
395
+ },
396
+ {
397
+ "cell_type": "code",
398
+ "execution_count": 69,
399
+ "id": "7ef978ca",
400
+ "metadata": {},
401
+ "outputs": [
402
+ {
403
+ "data": {
404
+ "text/plain": [
405
+ "array([[3, 2]], dtype=int64)"
406
+ ]
407
+ },
408
+ "execution_count": 69,
409
+ "metadata": {},
410
+ "output_type": "execute_result"
411
+ }
412
+ ],
413
+ "source": [
414
+ "I"
415
+ ]
416
+ },
417
+ {
418
+ "cell_type": "code",
419
+ "execution_count": 70,
420
+ "id": "e2fceefd",
421
+ "metadata": {},
422
+ "outputs": [
423
+ {
424
+ "data": {
425
+ "text/plain": [
426
+ "[[3, 2]]"
427
+ ]
428
+ },
429
+ "execution_count": 70,
430
+ "metadata": {},
431
+ "output_type": "execute_result"
432
+ }
433
+ ],
434
+ "source": [
435
+ "I.tolist()"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "code",
440
+ "execution_count": 71,
441
+ "id": "68f88083",
442
+ "metadata": {},
443
+ "outputs": [
444
+ {
445
+ "data": {
446
+ "text/plain": [
447
+ "[3, 2]"
448
+ ]
449
+ },
450
+ "execution_count": 71,
451
+ "metadata": {},
452
+ "output_type": "execute_result"
453
+ }
454
+ ],
455
+ "source": [
456
+ "row_indices = I.tolist()[0]\n",
457
+ "row_indices"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": 72,
463
+ "id": "d856895d",
464
+ "metadata": {
465
+ "scrolled": true
466
+ },
467
+ "outputs": [
468
+ {
469
+ "data": {
470
+ "text/html": [
471
+ "<div>\n",
472
+ "<style scoped>\n",
473
+ " .dataframe tbody tr th:only-of-type {\n",
474
+ " vertical-align: middle;\n",
475
+ " }\n",
476
+ "\n",
477
+ " .dataframe tbody tr th {\n",
478
+ " vertical-align: top;\n",
479
+ " }\n",
480
+ "\n",
481
+ " .dataframe thead th {\n",
482
+ " text-align: right;\n",
483
+ " }\n",
484
+ "</style>\n",
485
+ "<table border=\"1\" class=\"dataframe\">\n",
486
+ " <thead>\n",
487
+ " <tr style=\"text-align: right;\">\n",
488
+ " <th></th>\n",
489
+ " <th>text</th>\n",
490
+ " <th>category</th>\n",
491
+ " </tr>\n",
492
+ " </thead>\n",
493
+ " <tbody>\n",
494
+ " <tr>\n",
495
+ " <th>3</th>\n",
496
+ " <td>Vibrant color jeans for male are becoming a trend</td>\n",
497
+ " <td>Fashion</td>\n",
498
+ " </tr>\n",
499
+ " <tr>\n",
500
+ " <th>2</th>\n",
501
+ " <td>These are the latest fashion trends for this week</td>\n",
502
+ " <td>Fashion</td>\n",
503
+ " </tr>\n",
504
+ " </tbody>\n",
505
+ "</table>\n",
506
+ "</div>"
507
+ ],
508
+ "text/plain": [
509
+ " text category\n",
510
+ "3 Vibrant color jeans for male are becoming a trend Fashion\n",
511
+ "2 These are the latest fashion trends for this week Fashion"
512
+ ]
513
+ },
514
+ "execution_count": 72,
515
+ "metadata": {},
516
+ "output_type": "execute_result"
517
+ }
518
+ ],
519
+ "source": [
520
+ "df.loc[row_indices]"
521
+ ]
522
+ },
523
+ {
524
+ "cell_type": "code",
525
+ "execution_count": 73,
526
+ "id": "b65050a9",
527
+ "metadata": {},
528
+ "outputs": [
529
+ {
530
+ "data": {
531
+ "text/plain": [
532
+ "'I want to buy a polo t-shirt'"
533
+ ]
534
+ },
535
+ "execution_count": 73,
536
+ "metadata": {},
537
+ "output_type": "execute_result"
538
+ }
539
+ ],
540
+ "source": [
541
+ "search_query"
542
+ ]
543
+ },
544
+ {
545
+ "cell_type": "markdown",
546
+ "id": "e066c78d",
547
+ "metadata": {},
548
+ "source": [
549
+ "You can see that the two results from the dataframe are similar to a search_query"
550
+ ]
551
+ }
552
+ ],
553
+ "metadata": {
554
+ "kernelspec": {
555
+ "display_name": "Python 3 (ipykernel)",
556
+ "language": "python",
557
+ "name": "python3"
558
+ },
559
+ "language_info": {
560
+ "codemirror_mode": {
561
+ "name": "ipython",
562
+ "version": 3
563
+ },
564
+ "file_extension": ".py",
565
+ "mimetype": "text/x-python",
566
+ "name": "python",
567
+ "nbconvert_exporter": "python",
568
+ "pygments_lexer": "ipython3",
569
+ "version": "3.10.11"
570
+ }
571
+ },
572
+ "nbformat": 4,
573
+ "nbformat_minor": 5
574
+ }
nvda_news_1.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The stock of NVIDIA Corp (NASDAQ:NVDA) experienced a daily loss of -3.56% and a 3-month gain of 32.35%. With an Earnings Per Share (EPS) (EPS) of $1.92, the question arises: is the stock significantly overvalued? This article aims to provide a detailed valuation analysis of NVIDIA, offering insights into its financial strength, profitability, growth, and more. We invite you to delve into this comprehensive analysis.
2
+
3
+ Company Overview
4
+ Warning! GuruFocus has detected 10 Warning Signs with NVDA. Click here to check it out.
5
+
6
+ NVDA 30-Year Financial Data
7
+
8
+ The intrinsic value of NVDA
9
+
10
+
11
+ NVIDIA Corp (NASDAQ:NVDA) is a leading designer of discrete graphics processing units that enhance the experience on computing platforms. The firm's chips are widely used in various end markets, including PC gaming and data centers. In recent years, NVIDIA has broadened its focus from traditional PC graphics applications such as gaming to more complex and favorable opportunities, including artificial intelligence and autonomous driving, leveraging the high-performance capabilities of its products.
12
+
13
+ Currently, NVIDIA's stock price stands at $418.01, significantly higher than the GF Value of $310.28, indicating the stock might be overvalued. With a market cap of $1 trillion, the valuation seems steep. The following analysis aims to delve deeper into the company's value.
14
+
15
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
16
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
17
+ Understanding the GF Value
18
+ The GF Value is a unique measure of the intrinsic value of a stock, calculated based on historical trading multiples, a GuruFocus adjustment factor, and future business performance estimates. If the stock price is significantly above the GF Value Line, it is overvalued, and its future return is likely to be poor. Conversely, if it is significantly below the GF Value Line, its future return will likely be higher.
19
+
20
+ According to GuruFocus Value calculation, NVIDIA (NASDAQ:NVDA) appears to be significantly overvalued. The stock's current price of $418.01 per share and the market cap of $1 trillion further strengthen this assumption.
21
+
22
+ Given that NVIDIA is significantly overvalued, the long-term return of its stock is likely to be much lower than its future business growth.
23
+
24
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
25
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
26
+ Link: These companies may deliver higher future returns at reduced risk.
27
+
28
+ Financial Strength of NVIDIA
29
+ Examining the financial strength of a company is crucial before investing in its stock. Companies with poor financial strength pose a higher risk of permanent loss. NVIDIA's cash-to-debt ratio of 1.27 is worse than 58.04% of companies in the Semiconductors industry. However, NVIDIA's overall financial strength is 8 out of 10, indicating a strong financial position.
30
+
31
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
32
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
33
+ Profitability and Growth
34
+ Consistent profitability over the long term reduces the risk for investors. NVIDIA, with its profitability ranking of 10 out of 10, has been profitable for the past 10 years. The company's operating margin of 17.37% ranks better than 76.5% of companies in the Semiconductors industry.
35
+
36
+ However, growth is a crucial factor in a company's valuation. NVIDIA's growth ranks worse than 52.99% of companies in the Semiconductors industry, with its 3-year average revenue growth rate better than 87.88% of companies in the industry.
37
+
38
+ ROIC vs WACC
39
+ Comparing a company's return on invested capital (ROIC) to its weighted average cost of capital (WACC) is an effective way to evaluate its profitability. Over the past 12 months, NVIDIA's ROIC was 20.32 while its WACC was 16.74, suggesting that the company is creating value for its shareholders.
40
+
41
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
42
+ Is NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis
43
+ Conclusion
44
+ In conclusion, NVIDIA (NASDAQ:NVDA) appears to be significantly overvalued. Despite its strong financial condition and profitability, its growth ranks lower than 52.99% of companies in the Semiconductors industry. To learn more about NVIDIA stock, you can check out its 30-Year Financials here.
45
+
46
+ To find out the high quality companies that may deliver above-average returns, please check out GuruFocus High Quality Low Capex Screener.
47
+
48
+ This article first appeared on GuruFocus.
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.284
2
+ python-dotenv==1.0.0
3
+ streamlit==1.22.0
4
+ unstructured==0.9.2
5
+ tiktoken==0.4.0
6
+ faiss-cpu==1.7.4
7
+ libmagic==1.0
8
+ python-magic==0.4.27
9
+ python-magic-bin==0.4.14
10
+ OpenAI == 0.28.0
retrieval.ipynb ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "id": "0c2aff87",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import os\n",
11
+ "import streamlit as st\n",
12
+ "import pickle\n",
13
+ "import time\n",
14
+ "import langchain\n",
15
+ "from langchain import OpenAI\n",
16
+ "from langchain.chains import RetrievalQAWithSourcesChain\n",
17
+ "from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain\n",
18
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
19
+ "from langchain.document_loaders import UnstructuredURLLoader\n",
20
+ "from langchain.embeddings import OpenAIEmbeddings\n",
21
+ "from langchain.vectorstores import FAISS"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 6,
27
+ "id": "80fc5e57",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "#load openAI api key\n",
32
+ "os.environ['OPENAI_API_KEY'] = 'sk-proj-YXQCFNWqx1cRLsNczXtST3BlbkFJvG1UVBNpEJTUvgb6zSrV'"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 4,
38
+ "id": "39e721c4",
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "# Initialise LLM with required params\n",
43
+ "llm = OpenAI(temperature=0.9, max_tokens=500) "
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "markdown",
48
+ "id": "bd0c3ff7",
49
+ "metadata": {},
50
+ "source": [
51
+ "### (1) Load data"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 5,
57
+ "id": "55fa0ef5",
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "2"
64
+ ]
65
+ },
66
+ "execution_count": 5,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "loaders = UnstructuredURLLoader(urls=[\n",
73
+ " \"https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html\",\n",
74
+ " \"https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\"\n",
75
+ "])\n",
76
+ "data = loaders.load() \n",
77
+ "len(data)"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "markdown",
82
+ "id": "9f51a5bd",
83
+ "metadata": {},
84
+ "source": [
85
+ "### (2) Split data to create chunks"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 7,
91
+ "id": "054a6361",
92
+ "metadata": {},
93
+ "outputs": [],
94
+ "source": [
95
+ "text_splitter = RecursiveCharacterTextSplitter(\n",
96
+ " chunk_size=1000,\n",
97
+ " chunk_overlap=200\n",
98
+ ")\n",
99
+ "\n",
100
+ "# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.\n",
101
+ "docs = text_splitter.split_documents(data)"
102
+ ]
103
+ },
104
+ {
105
+ "cell_type": "code",
106
+ "execution_count": 8,
107
+ "id": "379e3d94",
108
+ "metadata": {},
109
+ "outputs": [
110
+ {
111
+ "data": {
112
+ "text/plain": [
113
+ "41"
114
+ ]
115
+ },
116
+ "execution_count": 8,
117
+ "metadata": {},
118
+ "output_type": "execute_result"
119
+ }
120
+ ],
121
+ "source": [
122
+ "len(docs)"
123
+ ]
124
+ },
125
+ {
126
+ "cell_type": "code",
127
+ "execution_count": 9,
128
+ "id": "637ee7ae",
129
+ "metadata": {
130
+ "scrolled": true
131
+ },
132
+ "outputs": [
133
+ {
134
+ "data": {
135
+ "text/plain": [
136
+ "Document(page_content='English\\n\\nHindi\\n\\nGujarati\\n\\nSpecials\\n\\nTrending Stocks\\n\\nIRFC\\xa0INE053F01010, IRFC, 543257\\n\\nTata Power\\xa0INE245A01021, TATAPOWER, 500400\\n\\nRail Vikas\\xa0INE415G01027, RVNL, 542649\\n\\nJio Financial\\xa0INE758E01017, JIOFIN, 543940\\n\\nSuzlon Energy\\xa0INE040H01021, SUZLON, 532667\\n\\nCheck your Credit Score here!\\n\\nQuotes\\n\\nMutual Funds\\n\\nCommodities\\n\\nFutures & Options\\n\\nCurrency\\n\\nNews\\n\\nCryptocurrency\\n\\nForum\\n\\nNotices\\n\\nVideos\\n\\nGlossary\\n\\nAll\\n\\nHello, LoginHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsMy Profile My PROMy PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsLogoutChat with UsDownload AppFollow us on:\\n\\nUpgrade', metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'})"
137
+ ]
138
+ },
139
+ "execution_count": 9,
140
+ "metadata": {},
141
+ "output_type": "execute_result"
142
+ }
143
+ ],
144
+ "source": [
145
+ "docs[0]"
146
+ ]
147
+ },
148
+ {
149
+ "cell_type": "markdown",
150
+ "id": "9e35a876",
151
+ "metadata": {},
152
+ "source": [
153
+ "### (3) Create embeddings for these chunks and save them to FAISS index"
154
+ ]
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 12,
159
+ "id": "c3d0a6dd",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "# Create the embeddings of the chunks using openAIEmbeddings\n",
164
+ "embeddings = OpenAIEmbeddings()\n",
165
+ "\n",
166
+ "# Pass the documents and embeddings inorder to create FAISS vector index\n",
167
+ "vectorindex_openai = FAISS.from_documents(docs, embeddings)"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 14,
173
+ "id": "a9686c13",
174
+ "metadata": {},
175
+ "outputs": [],
176
+ "source": [
177
+ "# Storing vector index create in local\n",
178
+ "file_path=\"vector_index.pkl\"\n",
179
+ "with open(file_path, \"wb\") as f:\n",
180
+ " pickle.dump(vectorindex_openai, f)"
181
+ ]
182
+ },
183
+ {
184
+ "cell_type": "code",
185
+ "execution_count": 15,
186
+ "id": "688dc29b",
187
+ "metadata": {},
188
+ "outputs": [],
189
+ "source": [
190
+ "if os.path.exists(file_path):\n",
191
+ " with open(file_path, \"rb\") as f:\n",
192
+ " vectorIndex = pickle.load(f)"
193
+ ]
194
+ },
195
+ {
196
+ "cell_type": "markdown",
197
+ "id": "fbd96296",
198
+ "metadata": {},
199
+ "source": [
200
+ "### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer"
201
+ ]
202
+ },
203
+ {
204
+ "cell_type": "code",
205
+ "execution_count": 18,
206
+ "id": "01f5e1e8",
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "data": {
211
+ "text/plain": [
212
+ "RetrievalQAWithSourcesChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, combine_documents_chain=MapReduceDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, template='Use the following portion of a long document to see if any of the text is relevant to answer the question. \\nReturn any relevant text verbatim.\\n{context}\\nQuestion: {question}\\nRelevant text, if any:', template_format='f-string', validate_template=True), llm=OpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.9, max_tokens=500, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', tiktoken_model_name=None), output_key='text', output_parser=StrOutputParser(), return_final_only=True, llm_kwargs={}), reduce_documents_chain=ReduceDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', combine_documents_chain=StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, tags=None, metadata=None, prompt=PromptTemplate(input_variables=['summaries', 'question'], output_parser=None, partial_variables={}, template='Given the following extracted parts of a long document and a question, create a final answer with references (\"SOURCES\"). \\nIf you don\\'t know the answer, just say that you don\\'t know. Don\\'t try to make up an answer.\\nALWAYS return a \"SOURCES\" part in your answer.\\n\\nQUESTION: Which state/country\\'s law governs the interpretation of the contract?\\n=========\\nContent: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights.\\nSource: 28-pl\\nContent: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\\n\\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\\n\\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\\n\\n11.9 No Third-Party Beneficiaries.\\nSource: 30-pl\\nContent: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur,\\nSource: 4-pl\\n=========\\nFINAL ANSWER: This Agreement is governed by English law.\\nSOURCES: 28-pl\\n\\nQUESTION: What did the president say about Michael Jackson?\\n=========\\nContent: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \\n\\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland.\\nSource: 0-pl\\nContent: And we won’t stop. \\n\\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves.\\nSource: 24-pl\\nContent: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \\n\\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \\n\\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \\n\\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \\n\\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \\n\\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \\n\\nBut I want you to know that we are going to be okay.\\nSource: 5-pl\\nContent: More support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \\n\\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation.\\nSource: 34-pl\\n=========\\nFINAL ANSWER: The president did not mention Michael Jackson.\\nSOURCES:\\n\\nQUESTION: {question}\\n=========\\n{summaries}\\n=========\\nFINAL ANSWER:', template_format='f-string', validate_template=True), llm=OpenAI(cache=None, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, client=<class 'openai.api_resources.completion.Completion'>, model_name='text-davinci-003', temperature=0.9, max_tokens=500, top_p=1, frequency_penalty=0, presence_penalty=0, n=1, best_of=1, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', batch_size=20, request_timeout=None, logit_bias={}, max_retries=6, streaming=False, allowed_special=set(), disallowed_special='all', tiktoken_model_name=None), output_key='text', output_parser=StrOutputParser(), return_final_only=True, llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content', 'source'], output_parser=None, partial_variables={}, template='Content: {page_content}\\nSource: {source}', template_format='f-string', validate_template=True), document_variable_name='summaries', document_separator='\\n\\n'), collapse_documents_chain=None, token_max=3000), document_variable_name='context', return_intermediate_steps=False), question_key='question', input_docs_key='docs', answer_key='answer', sources_answer_key='sources', return_source_documents=False, retriever=VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x000001E36D3E27D0>, search_type='similarity', search_kwargs={}), reduce_k_below_max_tokens=False, max_tokens_limit=3375)"
213
+ ]
214
+ },
215
+ "execution_count": 18,
216
+ "metadata": {},
217
+ "output_type": "execute_result"
218
+ }
219
+ ],
220
+ "source": [
221
+ "chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorIndex.as_retriever())\n",
222
+ "chain"
223
+ ]
224
+ },
225
+ {
226
+ "cell_type": "code",
227
+ "execution_count": 26,
228
+ "id": "8c2e228b",
229
+ "metadata": {
230
+ "scrolled": false
231
+ },
232
+ "outputs": [
233
+ {
234
+ "name": "stdout",
235
+ "output_type": "stream",
236
+ "text": [
237
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain] Entering Chain run with input:\n",
238
+ "\u001b[0m{\n",
239
+ " \"question\": \"what is the price of Tiago iCNG?\"\n",
240
+ "}\n",
241
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] Entering Chain run with input:\n",
242
+ "\u001b[0m[inputs]\n",
243
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:\n",
244
+ "\u001b[0m{\n",
245
+ " \"input_list\": [\n",
246
+ " {\n",
247
+ " \"context\": \"The company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.\\n\\nThe Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.\\n\\nTata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up \\\"appealing, holistic, and stronger than ever\\\".\\n\\nPTI\\n\\nTags:\\n\\n#Business\\n\\n#Companies\\n\\nfirst published: Aug 4, 2023 02:17 pm\\n\\nbusiness news,\\n\\nSensex, and\\n\\nNifty updates. Obtain\\n\\nPersonal Finance insights, tax queries, and expert opinions on\\n\\nMoneycontrol or download the\\n\\nMoneycontrol App to stay updated!\\n\\nForum\\n\\nFacebook\\n\\nTwitter\\n\\nInstagram\\n\\nLinkedin\\n\\nRSS\\n\\nPortfolio\\n\\nMarkets\\n\\nWatchlist\\n\\nLive TV Show\\n\\nCurrencies\\n\\nCredit Score\\n\\nCommodities\\n\\nFixed Income\\n\\nPersonal Finance\\n\\nMutual Fund\\n\\nPre-Market\\n\\nIPO\\n\\nGlobal Market\\n\\nBudget 2023\\n\\nGold Rate\\n\\nBSE Sensex\\n\\nForum\\n\\nMC 30\\n\\nNews\\n\\nBusiness\\n\\nMarkets\\n\\nStocks\\n\\nEconomy\",\n",
248
+ " \"question\": \"what is the price of Tiago iCNG?\"\n",
249
+ " },\n",
250
+ " {\n",
251
+ " \"context\": \"Tata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nWatchlist\\n\\nPortfolio\\n\\nMessage\\n\\nSet Alert\\n\\nlive\\n\\nbselive\\n\\nnselive\\n\\nVolume \\n\\nTodays L/H \\n\\nMore\\n\\nTata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\\n\\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement.\\n\\nIt is also equipped with other features such as voice assisted electric sunroof, automatic projector headlamps, LED DRLs, 16-inch diamond cut alloy wheels, 7-inch infotainment system by Harman that supports Android Auto and Apple Carplay connectivity, rain sensing wipers and height adjustable driver seat.\",\n",
252
+ " \"question\": \"what is the price of Tiago iCNG?\"\n",
253
+ " },\n",
254
+ " {\n",
255
+ " \"context\": \"Be a PRO\\n\\nBusiness\\n\\nMarkets\\n\\nStocks\\n\\nEconomy\\n\\nCompanies\\n\\nTrends\\n\\nIPO\\n\\nOpinion\\n\\nEV Special\\n\\nVisa Expert:\\n\\nGet instant updates on the latest news in the immigration world right at your fingertips with Visa Expert. Click Here!\\n\\nyou are here:\\n\\nHome\\n\\nNews\\n\\nBusiness\\n\\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement.\\n\\nPTI\\n\\nAugust 04, 2023 / 02:17 PM IST\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nWatchlist\\n\\nPortfolio\\n\\nMessage\\n\\nSet Alert\\n\\nlive\\n\\nbselive\\n\\nnselive\\n\\nVolume \\n\\nTodays L/H \\n\\nMore\",\n",
256
+ " \"question\": \"what is the price of Tiago iCNG?\"\n",
257
+ " },\n",
258
+ " {\n",
259
+ " \"context\": \"English\\n\\nHindi\\n\\nGujarati\\n\\nSpecials\\n\\nTrending Stocks\\n\\nIRFC INE053F01010, IRFC, 543257\\n\\nTata Power INE245A01021, TATAPOWER, 500400\\n\\nRail Vikas INE415G01027, RVNL, 542649\\n\\nJio Financial INE758E01017, JIOFIN, 543940\\n\\nSuzlon Energy INE040H01021, SUZLON, 532667\\n\\nCheck your Credit Score here!\\n\\nQuotes\\n\\nMutual Funds\\n\\nCommodities\\n\\nFutures & Options\\n\\nCurrency\\n\\nNews\\n\\nCryptocurrency\\n\\nForum\\n\\nNotices\\n\\nVideos\\n\\nGlossary\\n\\nAll\\n\\nHello, LoginHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsMy Profile My PROMy PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsLogoutChat with UsDownload AppFollow us on:\\n\\nUpgrade\",\n",
260
+ " \"question\": \"what is the price of Tiago iCNG?\"\n",
261
+ " }\n",
262
+ " ]\n",
263
+ "}\n",
264
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 5:llm:OpenAI] Entering LLM run with input:\n",
265
+ "\u001b[0m{\n",
266
+ " \"prompts\": [\n",
267
+ " \"Use the following portion of a long document to see if any of the text is relevant to answer the question. \\nReturn any relevant text verbatim.\\nThe company also said it has also introduced the twin-cylinder technology on its Tiago and Tigor models.\\n\\nThe Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh, while the Tigor iCNG comes at a price range of Rs 7.8 lakh to Rs 8.95 lakh.\\n\\nTata Motors Passenger Vehicles Ltd Head-Marketing, Vinay Pant said these introductions put together will make the company's CNG line up \\\"appealing, holistic, and stronger than ever\\\".\\n\\nPTI\\n\\nTags:\\n\\n#Business\\n\\n#Companies\\n\\nfirst published: Aug 4, 2023 02:17 pm\\n\\nbusiness news,\\n\\nSensex, and\\n\\nNifty updates. Obtain\\n\\nPersonal Finance insights, tax queries, and expert opinions on\\n\\nMoneycontrol or download the\\n\\nMoneycontrol App to stay updated!\\n\\nForum\\n\\nFacebook\\n\\nTwitter\\n\\nInstagram\\n\\nLinkedin\\n\\nRSS\\n\\nPortfolio\\n\\nMarkets\\n\\nWatchlist\\n\\nLive TV Show\\n\\nCurrencies\\n\\nCredit Score\\n\\nCommodities\\n\\nFixed Income\\n\\nPersonal Finance\\n\\nMutual Fund\\n\\nPre-Market\\n\\nIPO\\n\\nGlobal Market\\n\\nBudget 2023\\n\\nGold Rate\\n\\nBSE Sensex\\n\\nForum\\n\\nMC 30\\n\\nNews\\n\\nBusiness\\n\\nMarkets\\n\\nStocks\\n\\nEconomy\\nQuestion: what is the price of Tiago iCNG?\\nRelevant text, if any:\"\n",
268
+ " ]\n",
269
+ "}\n",
270
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 6:llm:OpenAI] Entering LLM run with input:\n",
271
+ "\u001b[0m{\n",
272
+ " \"prompts\": [\n",
273
+ " \"Use the following portion of a long document to see if any of the text is relevant to answer the question. \\nReturn any relevant text verbatim.\\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nWatchlist\\n\\nPortfolio\\n\\nMessage\\n\\nSet Alert\\n\\nlive\\n\\nbselive\\n\\nnselive\\n\\nVolume \\n\\nTodays L/H \\n\\nMore\\n\\nTata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\\n\\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement.\\n\\nIt is also equipped with other features such as voice assisted electric sunroof, automatic projector headlamps, LED DRLs, 16-inch diamond cut alloy wheels, 7-inch infotainment system by Harman that supports Android Auto and Apple Carplay connectivity, rain sensing wipers and height adjustable driver seat.\\nQuestion: what is the price of Tiago iCNG?\\nRelevant text, if any:\"\n",
274
+ " ]\n",
275
+ "}\n",
276
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 7:llm:OpenAI] Entering LLM run with input:\n",
277
+ "\u001b[0m{\n",
278
+ " \"prompts\": [\n",
279
+ " \"Use the following portion of a long document to see if any of the text is relevant to answer the question. \\nReturn any relevant text verbatim.\\nBe a PRO\\n\\nBusiness\\n\\nMarkets\\n\\nStocks\\n\\nEconomy\\n\\nCompanies\\n\\nTrends\\n\\nIPO\\n\\nOpinion\\n\\nEV Special\\n\\nVisa Expert:\\n\\nGet instant updates on the latest news in the immigration world right at your fingertips with Visa Expert. Click Here!\\n\\nyou are here:\\n\\nHome\\n\\nNews\\n\\nBusiness\\n\\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nThe Punch iCNG is equipped with the company's proprietary twin-cylinder technology with enhanced safety features like a micro-switch to keep the car switched off at the time of refuelling and thermal incident protection that cuts off CNG supply to the engine and releases gas into the atmosphere, Tata Motors said in a statement.\\n\\nPTI\\n\\nAugust 04, 2023 / 02:17 PM IST\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\n\\nWatchlist\\n\\nPortfolio\\n\\nMessage\\n\\nSet Alert\\n\\nlive\\n\\nbselive\\n\\nnselive\\n\\nVolume \\n\\nTodays L/H \\n\\nMore\\nQuestion: what is the price of Tiago iCNG?\\nRelevant text, if any:\"\n",
280
+ " ]\n",
281
+ "}\n",
282
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 8:llm:OpenAI] Entering LLM run with input:\n",
283
+ "\u001b[0m{\n",
284
+ " \"prompts\": [\n",
285
+ " \"Use the following portion of a long document to see if any of the text is relevant to answer the question. \\nReturn any relevant text verbatim.\\nEnglish\\n\\nHindi\\n\\nGujarati\\n\\nSpecials\\n\\nTrending Stocks\\n\\nIRFC INE053F01010, IRFC, 543257\\n\\nTata Power INE245A01021, TATAPOWER, 500400\\n\\nRail Vikas INE415G01027, RVNL, 542649\\n\\nJio Financial INE758E01017, JIOFIN, 543940\\n\\nSuzlon Energy INE040H01021, SUZLON, 532667\\n\\nCheck your Credit Score here!\\n\\nQuotes\\n\\nMutual Funds\\n\\nCommodities\\n\\nFutures & Options\\n\\nCurrency\\n\\nNews\\n\\nCryptocurrency\\n\\nForum\\n\\nNotices\\n\\nVideos\\n\\nGlossary\\n\\nAll\\n\\nHello, LoginHello, LoginLog-inor Sign-UpMy AccountMy Profile My PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsMy Profile My PROMy PortfolioMy WatchlistMy Credit ScoreMy MessagesMy AlertsLogoutChat with UsDownload AppFollow us on:\\n\\nUpgrade\\nQuestion: what is the price of Tiago iCNG?\\nRelevant text, if any:\"\n",
286
+ " ]\n",
287
+ "}\n"
288
+ ]
289
+ },
290
+ {
291
+ "name": "stdout",
292
+ "output_type": "stream",
293
+ "text": [
294
+ "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 5:llm:OpenAI] [1.01s] Exiting LLM run with output:\n",
295
+ "\u001b[0m{\n",
296
+ " \"generations\": [\n",
297
+ " [\n",
298
+ " {\n",
299
+ " \"text\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\",\n",
300
+ " \"generation_info\": {\n",
301
+ " \"finish_reason\": \"stop\",\n",
302
+ " \"logprobs\": null\n",
303
+ " }\n",
304
+ " }\n",
305
+ " ]\n",
306
+ " ],\n",
307
+ " \"llm_output\": {\n",
308
+ " \"token_usage\": {\n",
309
+ " \"total_tokens\": 1343,\n",
310
+ " \"prompt_tokens\": 1269,\n",
311
+ " \"completion_tokens\": 74\n",
312
+ " },\n",
313
+ " \"model_name\": \"text-davinci-003\"\n",
314
+ " },\n",
315
+ " \"run\": null\n",
316
+ "}\n",
317
+ "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 6:llm:OpenAI] [1.01s] Exiting LLM run with output:\n",
318
+ "\u001b[0m{\n",
319
+ " \"generations\": [\n",
320
+ " [\n",
321
+ " {\n",
322
+ " \"text\": \" Tata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\",\n",
323
+ " \"generation_info\": {\n",
324
+ " \"finish_reason\": \"stop\",\n",
325
+ " \"logprobs\": null\n",
326
+ " }\n",
327
+ " }\n",
328
+ " ]\n",
329
+ " ],\n",
330
+ " \"llm_output\": {\n",
331
+ " \"token_usage\": {},\n",
332
+ " \"model_name\": \"text-davinci-003\"\n",
333
+ " },\n",
334
+ " \"run\": null\n",
335
+ "}\n",
336
+ "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 7:llm:OpenAI] [1.01s] Exiting LLM run with output:\n",
337
+ "\u001b[0m{\n",
338
+ " \"generations\": [\n",
339
+ " [\n",
340
+ " {\n",
341
+ " \"text\": \" Tata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\",\n",
342
+ " \"generation_info\": {\n",
343
+ " \"finish_reason\": \"stop\",\n",
344
+ " \"logprobs\": null\n",
345
+ " }\n",
346
+ " }\n",
347
+ " ]\n",
348
+ " ],\n",
349
+ " \"llm_output\": {\n",
350
+ " \"token_usage\": {},\n",
351
+ " \"model_name\": \"text-davinci-003\"\n",
352
+ " },\n",
353
+ " \"run\": null\n",
354
+ "}\n",
355
+ "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain > 8:llm:OpenAI] [1.01s] Exiting LLM run with output:\n",
356
+ "\u001b[0m{\n",
357
+ " \"generations\": [\n",
358
+ " [\n",
359
+ " {\n",
360
+ " \"text\": \" None\",\n",
361
+ " \"generation_info\": {\n",
362
+ " \"finish_reason\": \"stop\",\n",
363
+ " \"logprobs\": null\n",
364
+ " }\n",
365
+ " }\n",
366
+ " ]\n",
367
+ " ],\n",
368
+ " \"llm_output\": {\n",
369
+ " \"token_usage\": {},\n",
370
+ " \"model_name\": \"text-davinci-003\"\n",
371
+ " },\n",
372
+ " \"run\": null\n",
373
+ "}\n",
374
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 4:chain:LLMChain] [1.02s] Exiting Chain run with output:\n",
375
+ "\u001b[0m{\n",
376
+ " \"outputs\": [\n",
377
+ " {\n",
378
+ " \"text\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\"\n",
379
+ " },\n",
380
+ " {\n",
381
+ " \"text\": \" Tata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\"\n",
382
+ " },\n",
383
+ " {\n",
384
+ " \"text\": \" Tata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\"\n",
385
+ " },\n",
386
+ " {\n",
387
+ " \"text\": \" None\"\n",
388
+ " }\n",
389
+ " ]\n",
390
+ "}\n",
391
+ "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain] Entering Chain run with input:\n",
392
+ "\u001b[0m{\n",
393
+ " \"question\": \"what is the price of Tiago iCNG?\",\n",
394
+ " \"summaries\": \"Content: The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: Tata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: Tata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: None\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\"\n",
395
+ "}\n",
396
+ "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain > 10:llm:OpenAI] Entering LLM run with input:\n",
397
+ "\u001b[0m{\n",
398
+ " \"prompts\": [\n",
399
+ " \"Given the following extracted parts of a long document and a question, create a final answer with references (\\\"SOURCES\\\"). \\nIf you don't know the answer, just say that you don't know. Don't try to make up an answer.\\nALWAYS return a \\\"SOURCES\\\" part in your answer.\\n\\nQUESTION: Which state/country's law governs the interpretation of the contract?\\n=========\\nContent: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an injunction or other relief to protect its Intellectual Property Rights.\\nSource: 28-pl\\nContent: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other) right or remedy.\\n\\n11.7 Severability. The invalidity, illegality or unenforceability of any term (or part of a term) of this Agreement shall not affect the continuation in force of the remainder of the term (if any) and this Agreement.\\n\\n11.8 No Agency. Except as expressly stated otherwise, nothing in this Agreement shall create an agency, partnership or joint venture of any kind between the parties.\\n\\n11.9 No Third-Party Beneficiaries.\\nSource: 30-pl\\nContent: (b) if Google believes, in good faith, that the Distributor has violated or caused Google to violate any Anti-Bribery Laws (as defined in Clause 8.5) or that such a violation is reasonably likely to occur,\\nSource: 4-pl\\n=========\\nFINAL ANSWER: This Agreement is governed by English law.\\nSOURCES: 28-pl\\n\\nQUESTION: What did the president say about Michael Jackson?\\n=========\\nContent: Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \\n\\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland.\\nSource: 0-pl\\nContent: And we won’t stop. \\n\\nWe have lost so much to COVID-19. Time with one another. And worst of all, so much loss of life. \\n\\nLet’s use this moment to reset. Let’s stop looking at COVID-19 as a partisan dividing line and see it for what it is: A God-awful disease. \\n\\nLet’s stop seeing each other as enemies, and start seeing each other for who we really are: Fellow Americans. \\n\\nWe can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves.\\nSource: 24-pl\\nContent: And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards. \\n\\nTo all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \\n\\nAnd I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \\n\\nTonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world. \\n\\nAmerica will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \\n\\nThese steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \\n\\nBut I want you to know that we are going to be okay.\\nSource: 5-pl\\nContent: More support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \\n\\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation.\\nSource: 34-pl\\n=========\\nFINAL ANSWER: The president did not mention Michael Jackson.\\nSOURCES:\\n\\nQUESTION: what is the price of Tiago iCNG?\\n=========\\nContent: The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: Tata Motors on Friday launched the CNG variant of its micro SUV Punch priced between Rs 7.1 lakh and Rs 9.68 lakh (ex-showroom, Delhi).\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: Tata Motors launches Punch iCNG, price starts at Rs 7.1 lakh\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n\\nContent: None\\nSource: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\\n=========\\nFINAL ANSWER:\"\n",
400
+ " ]\n",
401
+ "}\n"
402
+ ]
403
+ },
404
+ {
405
+ "name": "stdout",
406
+ "output_type": "stream",
407
+ "text": [
408
+ "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain > 10:llm:OpenAI] [2.88s] Exiting LLM run with output:\n",
409
+ "\u001b[0m{\n",
410
+ " \"generations\": [\n",
411
+ " [\n",
412
+ " {\n",
413
+ " \"text\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\nSOURCES: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\",\n",
414
+ " \"generation_info\": {\n",
415
+ " \"finish_reason\": \"stop\",\n",
416
+ " \"logprobs\": null\n",
417
+ " }\n",
418
+ " }\n",
419
+ " ]\n",
420
+ " ],\n",
421
+ " \"llm_output\": {\n",
422
+ " \"token_usage\": {\n",
423
+ " \"total_tokens\": 2093,\n",
424
+ " \"prompt_tokens\": 1976,\n",
425
+ " \"completion_tokens\": 117\n",
426
+ " },\n",
427
+ " \"model_name\": \"text-davinci-003\"\n",
428
+ " },\n",
429
+ " \"run\": null\n",
430
+ "}\n",
431
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain > 9:chain:LLMChain] [2.88s] Exiting Chain run with output:\n",
432
+ "\u001b[0m{\n",
433
+ " \"text\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\nSOURCES: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\"\n",
434
+ "}\n",
435
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain > 3:chain:MapReduceDocumentsChain] [3.90s] Exiting Chain run with output:\n",
436
+ "\u001b[0m{\n",
437
+ " \"output_text\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\nSOURCES: https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\"\n",
438
+ "}\n",
439
+ "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQAWithSourcesChain] [4.09s] Exiting Chain run with output:\n",
440
+ "\u001b[0m{\n",
441
+ " \"answer\": \" The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\n\",\n",
442
+ " \"sources\": \"https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html\"\n",
443
+ "}\n"
444
+ ]
445
+ },
446
+ {
447
+ "data": {
448
+ "text/plain": [
449
+ "{'answer': ' The Tiago iCNG is priced between Rs 6.55 lakh and Rs 8.1 lakh.\\n',\n",
450
+ " 'sources': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.htmlhttps://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html'}"
451
+ ]
452
+ },
453
+ "execution_count": 26,
454
+ "metadata": {},
455
+ "output_type": "execute_result"
456
+ }
457
+ ],
458
+ "source": [
459
+ "query = \"what is the price of Tiago iCNG?\"\n",
460
+ "# query = \"what are the main features of punch iCNG?\"\n",
461
+ "\n",
462
+ "langchain.debug=True\n",
463
+ "\n",
464
+ "chain({\"question\": query}, return_only_outputs=True)"
465
+ ]
466
+ }
467
+ ],
468
+ "metadata": {
469
+ "kernelspec": {
470
+ "display_name": "Python 3 (ipykernel)",
471
+ "language": "python",
472
+ "name": "python3"
473
+ },
474
+ "language_info": {
475
+ "codemirror_mode": {
476
+ "name": "ipython",
477
+ "version": 3
478
+ },
479
+ "file_extension": ".py",
480
+ "mimetype": "text/x-python",
481
+ "name": "python",
482
+ "nbconvert_exporter": "python",
483
+ "pygments_lexer": "ipython3",
484
+ "version": "3.10.11"
485
+ }
486
+ },
487
+ "nbformat": 4,
488
+ "nbformat_minor": 5
489
+ }
text_loaders_splitters.ipynb ADDED
@@ -0,0 +1,891 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ae222ed8",
6
+ "metadata": {},
7
+ "source": [
8
+ "## Document Loaders In LangChain"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "e65dada0",
14
+ "metadata": {},
15
+ "source": [
16
+ "#### TextLoader"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 1,
22
+ "id": "470ed0a2",
23
+ "metadata": {
24
+ "scrolled": false
25
+ },
26
+ "outputs": [
27
+ {
28
+ "data": {
29
+ "text/plain": [
30
+ "[Document(page_content=\"The stock of NVIDIA Corp (NASDAQ:NVDA) experienced a daily loss of -3.56% and a 3-month gain of 32.35%. With an Earnings Per Share (EPS) (EPS) of $1.92, the question arises: is the stock significantly overvalued? This article aims to provide a detailed valuation analysis of NVIDIA, offering insights into its financial strength, profitability, growth, and more. We invite you to delve into this comprehensive analysis.\\n\\nCompany Overview\\nWarning! GuruFocus has detected 10 Warning Signs with NVDA. Click here to check it out.\\n\\nNVDA 30-Year Financial Data\\n\\nThe intrinsic value of NVDA\\n\\n\\nNVIDIA Corp (NASDAQ:NVDA) is a leading designer of discrete graphics processing units that enhance the experience on computing platforms. The firm's chips are widely used in various end markets, including PC gaming and data centers. In recent years, NVIDIA has broadened its focus from traditional PC graphics applications such as gaming to more complex and favorable opportunities, including artificial intelligence and autonomous driving, leveraging the high-performance capabilities of its products.\\n\\nCurrently, NVIDIA's stock price stands at $418.01, significantly higher than the GF Value of $310.28, indicating the stock might be overvalued. With a market cap of $1 trillion, the valuation seems steep. The following analysis aims to delve deeper into the company's value.\\n\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nUnderstanding the GF Value\\nThe GF Value is a unique measure of the intrinsic value of a stock, calculated based on historical trading multiples, a GuruFocus adjustment factor, and future business performance estimates. If the stock price is significantly above the GF Value Line, it is overvalued, and its future return is likely to be poor. Conversely, if it is significantly below the GF Value Line, its future return will likely be higher.\\n\\nAccording to GuruFocus Value calculation, NVIDIA (NASDAQ:NVDA) appears to be significantly overvalued. The stock's current price of $418.01 per share and the market cap of $1 trillion further strengthen this assumption.\\n\\nGiven that NVIDIA is significantly overvalued, the long-term return of its stock is likely to be much lower than its future business growth.\\n\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nLink: These companies may deliver higher future returns at reduced risk.\\n\\nFinancial Strength of NVIDIA\\nExamining the financial strength of a company is crucial before investing in its stock. Companies with poor financial strength pose a higher risk of permanent loss. NVIDIA's cash-to-debt ratio of 1.27 is worse than 58.04% of companies in the Semiconductors industry. However, NVIDIA's overall financial strength is 8 out of 10, indicating a strong financial position.\\n\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nProfitability and Growth\\nConsistent profitability over the long term reduces the risk for investors. NVIDIA, with its profitability ranking of 10 out of 10, has been profitable for the past 10 years. The company's operating margin of 17.37% ranks better than 76.5% of companies in the Semiconductors industry.\\n\\nHowever, growth is a crucial factor in a company's valuation. NVIDIA's growth ranks worse than 52.99% of companies in the Semiconductors industry, with its 3-year average revenue growth rate better than 87.88% of companies in the industry.\\n\\nROIC vs WACC\\nComparing a company's return on invested capital (ROIC) to its weighted average cost of capital (WACC) is an effective way to evaluate its profitability. Over the past 12 months, NVIDIA's ROIC was 20.32 while its WACC was 16.74, suggesting that the company is creating value for its shareholders.\\n\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nIs NVIDIA's Stock Significantly Overvalued? A Comprehensive Valuation Analysis\\nConclusion\\nIn conclusion, NVIDIA (NASDAQ:NVDA) appears to be significantly overvalued. Despite its strong financial condition and profitability, its growth ranks lower than 52.99% of companies in the Semiconductors industry. To learn more about NVIDIA stock, you can check out its 30-Year Financials here.\\n\\nTo find out the high quality companies that may deliver above-average returns, please check out GuruFocus High Quality Low Capex Screener.\\n\\nThis article first appeared on GuruFocus.\", metadata={'source': 'nvda_news_1.txt'})]"
31
+ ]
32
+ },
33
+ "execution_count": 1,
34
+ "metadata": {},
35
+ "output_type": "execute_result"
36
+ }
37
+ ],
38
+ "source": [
39
+ "from langchain.document_loaders import TextLoader\n",
40
+ "\n",
41
+ "loader = TextLoader(\"nvda_news_1.txt\")\n",
42
+ "loader.load()"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": 2,
48
+ "id": "ca5844b4",
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/plain": [
54
+ "langchain.document_loaders.text.TextLoader"
55
+ ]
56
+ },
57
+ "execution_count": 2,
58
+ "metadata": {},
59
+ "output_type": "execute_result"
60
+ }
61
+ ],
62
+ "source": [
63
+ "type(loader)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 3,
69
+ "id": "bb95e130",
70
+ "metadata": {},
71
+ "outputs": [
72
+ {
73
+ "data": {
74
+ "text/plain": [
75
+ "'nvda_news_1.txt'"
76
+ ]
77
+ },
78
+ "execution_count": 3,
79
+ "metadata": {},
80
+ "output_type": "execute_result"
81
+ }
82
+ ],
83
+ "source": [
84
+ "loader.file_path"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "markdown",
89
+ "id": "4da25ff7",
90
+ "metadata": {},
91
+ "source": [
92
+ "#### CSVLoader"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 4,
98
+ "id": "ad67ba6e",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "from langchain.document_loaders.csv_loader import CSVLoader"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 5,
108
+ "id": "220b3fcb",
109
+ "metadata": {},
110
+ "outputs": [
111
+ {
112
+ "data": {
113
+ "text/plain": [
114
+ "[Document(page_content='movie_id: 101\\ntitle: K.G.F: Chapter 2\\nindustry: Bollywood\\nrelease_year: 2022\\nimdb_rating: 8.4\\nstudio: Hombale Films\\nlanguage_id: 3\\nbudget: 1\\nrevenue: 12.5\\nunit: Billions\\ncurrency: INR', metadata={'source': 'movies.csv', 'row': 0}),\n",
115
+ " Document(page_content='movie_id: 102\\ntitle: Doctor Strange in the Multiverse of Madness\\nindustry: Hollywood\\nrelease_year: 2022\\nimdb_rating: 7\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 200\\nrevenue: 954.8\\nunit: Millions\\ncurrency: USD', metadata={'source': 'movies.csv', 'row': 1}),\n",
116
+ " Document(page_content='movie_id: 103\\ntitle: Thor: The Dark World\\nindustry: Hollywood\\nrelease_year: 2013\\nimdb_rating: 6.8\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 165\\nrevenue: 644.8\\nunit: Millions\\ncurrency: USD', metadata={'source': 'movies.csv', 'row': 2}),\n",
117
+ " Document(page_content='movie_id: 104\\ntitle: Thor: Ragnarok\\nindustry: Hollywood\\nrelease_year: 2017\\nimdb_rating: 7.9\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 180\\nrevenue: 854\\nunit: Millions\\ncurrency: USD', metadata={'source': 'movies.csv', 'row': 3}),\n",
118
+ " Document(page_content='movie_id: 105\\ntitle: Thor: Love and Thunder\\nindustry: Hollywood\\nrelease_year: 2022\\nimdb_rating: 6.8\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 250\\nrevenue: 670\\nunit: Millions\\ncurrency: USD', metadata={'source': 'movies.csv', 'row': 4}),\n",
119
+ " Document(page_content='movie_id: 106\\ntitle: Sholay\\nindustry: Bollywood\\nrelease_year: 1975\\nimdb_rating: 8.1\\nstudio: United Producers\\nlanguage_id: 1\\nbudget: Not Available\\nrevenue: Not Available\\nunit: Not Available\\ncurrency: Not Available', metadata={'source': 'movies.csv', 'row': 5}),\n",
120
+ " Document(page_content='movie_id: 107\\ntitle: Dilwale Dulhania Le Jayenge\\nindustry: Bollywood\\nrelease_year: 1995\\nimdb_rating: 8\\nstudio: Yash Raj Films\\nlanguage_id: 1\\nbudget: 400\\nrevenue: 2000\\nunit: Millions\\ncurrency: INR', metadata={'source': 'movies.csv', 'row': 6}),\n",
121
+ " Document(page_content='movie_id: 108\\ntitle: 3 Idiots\\nindustry: Bollywood\\nrelease_year: 2009\\nimdb_rating: 8.4\\nstudio: Vinod Chopra Films\\nlanguage_id: 1\\nbudget: 550\\nrevenue: 4000\\nunit: Millions\\ncurrency: INR', metadata={'source': 'movies.csv', 'row': 7}),\n",
122
+ " Document(page_content='movie_id: 109\\ntitle: Kabhi Khushi Kabhie Gham\\nindustry: Bollywood\\nrelease_year: 2001\\nimdb_rating: 7.4\\nstudio: Dharma Productions\\nlanguage_id: 1\\nbudget: 390\\nrevenue: 1360\\nunit: Millions\\ncurrency: INR', metadata={'source': 'movies.csv', 'row': 8})]"
123
+ ]
124
+ },
125
+ "execution_count": 5,
126
+ "metadata": {},
127
+ "output_type": "execute_result"
128
+ }
129
+ ],
130
+ "source": [
131
+ "loader = CSVLoader(file_path=\"movies.csv\")\n",
132
+ "data = loader.load()\n",
133
+ "data"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 6,
139
+ "id": "4e1b3b9a",
140
+ "metadata": {
141
+ "scrolled": true
142
+ },
143
+ "outputs": [
144
+ {
145
+ "data": {
146
+ "text/plain": [
147
+ "Document(page_content='movie_id: 101\\ntitle: K.G.F: Chapter 2\\nindustry: Bollywood\\nrelease_year: 2022\\nimdb_rating: 8.4\\nstudio: Hombale Films\\nlanguage_id: 3\\nbudget: 1\\nrevenue: 12.5\\nunit: Billions\\ncurrency: INR', metadata={'source': 'movies.csv', 'row': 0})"
148
+ ]
149
+ },
150
+ "execution_count": 6,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "data[0]"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 7,
162
+ "id": "e356cd2b",
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "data": {
167
+ "text/plain": [
168
+ "[Document(page_content='movie_id: 101\\ntitle: K.G.F: Chapter 2\\nindustry: Bollywood\\nrelease_year: 2022\\nimdb_rating: 8.4\\nstudio: Hombale Films\\nlanguage_id: 3\\nbudget: 1\\nrevenue: 12.5\\nunit: Billions\\ncurrency: INR', metadata={'source': 'K.G.F: Chapter 2', 'row': 0}),\n",
169
+ " Document(page_content='movie_id: 102\\ntitle: Doctor Strange in the Multiverse of Madness\\nindustry: Hollywood\\nrelease_year: 2022\\nimdb_rating: 7\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 200\\nrevenue: 954.8\\nunit: Millions\\ncurrency: USD', metadata={'source': 'Doctor Strange in the Multiverse of Madness', 'row': 1}),\n",
170
+ " Document(page_content='movie_id: 103\\ntitle: Thor: The Dark World\\nindustry: Hollywood\\nrelease_year: 2013\\nimdb_rating: 6.8\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 165\\nrevenue: 644.8\\nunit: Millions\\ncurrency: USD', metadata={'source': 'Thor: The Dark World', 'row': 2}),\n",
171
+ " Document(page_content='movie_id: 104\\ntitle: Thor: Ragnarok\\nindustry: Hollywood\\nrelease_year: 2017\\nimdb_rating: 7.9\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 180\\nrevenue: 854\\nunit: Millions\\ncurrency: USD', metadata={'source': 'Thor: Ragnarok', 'row': 3}),\n",
172
+ " Document(page_content='movie_id: 105\\ntitle: Thor: Love and Thunder\\nindustry: Hollywood\\nrelease_year: 2022\\nimdb_rating: 6.8\\nstudio: Marvel Studios\\nlanguage_id: 5\\nbudget: 250\\nrevenue: 670\\nunit: Millions\\ncurrency: USD', metadata={'source': 'Thor: Love and Thunder', 'row': 4}),\n",
173
+ " Document(page_content='movie_id: 106\\ntitle: Sholay\\nindustry: Bollywood\\nrelease_year: 1975\\nimdb_rating: 8.1\\nstudio: United Producers\\nlanguage_id: 1\\nbudget: Not Available\\nrevenue: Not Available\\nunit: Not Available\\ncurrency: Not Available', metadata={'source': 'Sholay', 'row': 5}),\n",
174
+ " Document(page_content='movie_id: 107\\ntitle: Dilwale Dulhania Le Jayenge\\nindustry: Bollywood\\nrelease_year: 1995\\nimdb_rating: 8\\nstudio: Yash Raj Films\\nlanguage_id: 1\\nbudget: 400\\nrevenue: 2000\\nunit: Millions\\ncurrency: INR', metadata={'source': 'Dilwale Dulhania Le Jayenge', 'row': 6}),\n",
175
+ " Document(page_content='movie_id: 108\\ntitle: 3 Idiots\\nindustry: Bollywood\\nrelease_year: 2009\\nimdb_rating: 8.4\\nstudio: Vinod Chopra Films\\nlanguage_id: 1\\nbudget: 550\\nrevenue: 4000\\nunit: Millions\\ncurrency: INR', metadata={'source': '3 Idiots', 'row': 7}),\n",
176
+ " Document(page_content='movie_id: 109\\ntitle: Kabhi Khushi Kabhie Gham\\nindustry: Bollywood\\nrelease_year: 2001\\nimdb_rating: 7.4\\nstudio: Dharma Productions\\nlanguage_id: 1\\nbudget: 390\\nrevenue: 1360\\nunit: Millions\\ncurrency: INR', metadata={'source': 'Kabhi Khushi Kabhie Gham', 'row': 8})]"
177
+ ]
178
+ },
179
+ "execution_count": 7,
180
+ "metadata": {},
181
+ "output_type": "execute_result"
182
+ }
183
+ ],
184
+ "source": [
185
+ "loader = CSVLoader(file_path=\"movies.csv\", source_column=\"title\")\n",
186
+ "data = loader.load()\n",
187
+ "data"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 8,
193
+ "id": "42d13fd6",
194
+ "metadata": {},
195
+ "outputs": [
196
+ {
197
+ "data": {
198
+ "text/plain": [
199
+ "'movie_id: 101\\ntitle: K.G.F: Chapter 2\\nindustry: Bollywood\\nrelease_year: 2022\\nimdb_rating: 8.4\\nstudio: Hombale Films\\nlanguage_id: 3\\nbudget: 1\\nrevenue: 12.5\\nunit: Billions\\ncurrency: INR'"
200
+ ]
201
+ },
202
+ "execution_count": 8,
203
+ "metadata": {},
204
+ "output_type": "execute_result"
205
+ }
206
+ ],
207
+ "source": [
208
+ "data[0].page_content"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": 9,
214
+ "id": "0d8913df",
215
+ "metadata": {},
216
+ "outputs": [
217
+ {
218
+ "data": {
219
+ "text/plain": [
220
+ "{'source': 'K.G.F: Chapter 2', 'row': 0}"
221
+ ]
222
+ },
223
+ "execution_count": 9,
224
+ "metadata": {},
225
+ "output_type": "execute_result"
226
+ }
227
+ ],
228
+ "source": [
229
+ "data[0].metadata"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "markdown",
234
+ "id": "f0163924",
235
+ "metadata": {},
236
+ "source": [
237
+ "#### UnstructuredURLLoader"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "markdown",
242
+ "id": "48644959",
243
+ "metadata": {},
244
+ "source": [
245
+ "UnstructuredURLLoader of Langchain internally uses unstructured python library to load the content from url's\n",
246
+ "\n",
247
+ "https://unstructured-io.github.io/unstructured/introduction.html\n",
248
+ "\n",
249
+ "https://pypi.org/project/unstructured/#description"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "code",
254
+ "execution_count": 11,
255
+ "id": "2e1fbf77",
256
+ "metadata": {},
257
+ "outputs": [
258
+ {
259
+ "name": "stdout",
260
+ "output_type": "stream",
261
+ "text": [
262
+ "Requirement already satisfied: unstructured in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (0.9.2)\n",
263
+ "Requirement already satisfied: libmagic in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (1.0)\n",
264
+ "Requirement already satisfied: python-magic in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (0.4.27)\n",
265
+ "Requirement already satisfied: python-magic-bin in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (0.4.14)\n",
266
+ "Requirement already satisfied: tabulate in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (0.9.0)\n",
267
+ "Requirement already satisfied: requests in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (2.31.0)\n",
268
+ "Requirement already satisfied: nltk in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (3.8.1)\n",
269
+ "Requirement already satisfied: chardet in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (5.2.0)\n",
270
+ "Requirement already satisfied: filetype in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (1.2.0)\n",
271
+ "Requirement already satisfied: lxml in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from unstructured) (4.9.3)\n",
272
+ "Requirement already satisfied: click in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from nltk->unstructured) (8.1.3)\n",
273
+ "Requirement already satisfied: tqdm in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from nltk->unstructured) (4.65.0)\n",
274
+ "Requirement already satisfied: regex>=2021.8.3 in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from nltk->unstructured) (2023.8.8)\n",
275
+ "Requirement already satisfied: joblib in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from nltk->unstructured) (1.3.2)\n",
276
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests->unstructured) (2.0.3)\n",
277
+ "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests->unstructured) (2023.5.7)\n",
278
+ "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests->unstructured) (3.4)\n",
279
+ "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from requests->unstructured) (3.1.0)\n",
280
+ "Requirement already satisfied: colorama in c:\\users\\dhava\\appdata\\local\\programs\\python\\python310\\lib\\site-packages (from click->nltk->unstructured) (0.4.6)\n"
281
+ ]
282
+ },
283
+ {
284
+ "name": "stderr",
285
+ "output_type": "stream",
286
+ "text": [
287
+ "\n",
288
+ "[notice] A new release of pip is available: 23.0.1 -> 23.2.1\n",
289
+ "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
290
+ ]
291
+ }
292
+ ],
293
+ "source": [
294
+ "#installing necessary libraries, libmagic is used for file type detection\n",
295
+ "!pip3 install unstructured libmagic python-magic python-magic-bin"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 10,
301
+ "id": "9bfca934",
302
+ "metadata": {},
303
+ "outputs": [],
304
+ "source": [
305
+ "from langchain.document_loaders import UnstructuredURLLoader"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 11,
311
+ "id": "93ce75de",
312
+ "metadata": {},
313
+ "outputs": [],
314
+ "source": [
315
+ "loader = UnstructuredURLLoader(\n",
316
+ " urls = [\n",
317
+ " \"https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html\",\n",
318
+ " \"https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html\"\n",
319
+ " ]\n",
320
+ ")"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": 12,
326
+ "id": "be914bfa",
327
+ "metadata": {
328
+ "scrolled": false
329
+ },
330
+ "outputs": [
331
+ {
332
+ "data": {
333
+ "text/plain": [
334
+ "2"
335
+ ]
336
+ },
337
+ "execution_count": 12,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "data = loader.load()\n",
344
+ "len(data)"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 13,
350
+ "id": "52838711",
351
+ "metadata": {},
352
+ "outputs": [
353
+ {
354
+ "data": {
355
+ "text/plain": [
356
+ "'English\\n\\nHindi\\n\\nGujarati\\n\\nSpecials\\n\\nTrending Stocks\\n\\nIRFC\\xa0INE053F01010, IRFC, 543257\\n\\nTata Power\\xa0INE'"
357
+ ]
358
+ },
359
+ "execution_count": 13,
360
+ "metadata": {},
361
+ "output_type": "execute_result"
362
+ }
363
+ ],
364
+ "source": [
365
+ "data[0].page_content[0:100]"
366
+ ]
367
+ },
368
+ {
369
+ "cell_type": "code",
370
+ "execution_count": 14,
371
+ "id": "8b587350",
372
+ "metadata": {},
373
+ "outputs": [
374
+ {
375
+ "data": {
376
+ "text/plain": [
377
+ "{'source': 'https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html'}"
378
+ ]
379
+ },
380
+ "execution_count": 14,
381
+ "metadata": {},
382
+ "output_type": "execute_result"
383
+ }
384
+ ],
385
+ "source": [
386
+ "data[0].metadata"
387
+ ]
388
+ },
389
+ {
390
+ "cell_type": "markdown",
391
+ "id": "df45adf4",
392
+ "metadata": {},
393
+ "source": [
394
+ "## Text Splitters"
395
+ ]
396
+ },
397
+ {
398
+ "cell_type": "markdown",
399
+ "id": "7ce2e736",
400
+ "metadata": {},
401
+ "source": [
402
+ "Why do we need text splitters in first place?\n",
403
+ "\n",
404
+ "LLM's have token limits. Hence we need to split the text which can be large into small chunks so that each chunk size is under the token limit. There are various text splitter classes in langchain that allows us to do this."
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "code",
409
+ "execution_count": 80,
410
+ "id": "90431c5b",
411
+ "metadata": {},
412
+ "outputs": [],
413
+ "source": [
414
+ "# Taking some random text from wikipedia\n",
415
+ "\n",
416
+ "text = \"\"\"Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. \n",
417
+ "It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. \n",
418
+ "Set in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.\n",
419
+ "\n",
420
+ "Brothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007 and was originally set to be directed by Steven Spielberg. \n",
421
+ "Kip Thorne, a Caltech theoretical physicist and 2017 Nobel laureate in Physics,[4] was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar. \n",
422
+ "Cinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm. Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles. \n",
423
+ "Interstellar uses extensive practical and miniature effects, and the company Double Negative created additional digital effects.\n",
424
+ "\n",
425
+ "Interstellar premiered in Los Angeles on October 26, 2014. In the United States, it was first released on film stock, expanding to venues using digital projectors. The film received generally positive reviews from critics and grossed over $677 million worldwide ($715 million after subsequent re-releases), making it the tenth-highest-grossing film of 2014. \n",
426
+ "It has been praised by astronomers for its scientific accuracy and portrayal of theoretical astrophysics.[5][6][7] Interstellar was nominated for five awards at the 87th Academy Awards, winning Best Visual Effects, and received numerous other accolades.\"\"\""
427
+ ]
428
+ },
429
+ {
430
+ "cell_type": "markdown",
431
+ "id": "7af9a95b",
432
+ "metadata": {},
433
+ "source": [
434
+ "#### Manual approach of splitting the text into chunks"
435
+ ]
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "execution_count": 81,
440
+ "id": "a51fc70e",
441
+ "metadata": {},
442
+ "outputs": [
443
+ {
444
+ "data": {
445
+ "text/plain": [
446
+ "'Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher N'"
447
+ ]
448
+ },
449
+ "execution_count": 81,
450
+ "metadata": {},
451
+ "output_type": "execute_result"
452
+ }
453
+ ],
454
+ "source": [
455
+ "# Say LLM token limit is 100, in that case we can do simple thing such as this\n",
456
+ "\n",
457
+ "text[0:100]"
458
+ ]
459
+ },
460
+ {
461
+ "cell_type": "code",
462
+ "execution_count": 82,
463
+ "id": "2adae99a",
464
+ "metadata": {},
465
+ "outputs": [
466
+ {
467
+ "data": {
468
+ "text/plain": [
469
+ "264"
470
+ ]
471
+ },
472
+ "execution_count": 82,
473
+ "metadata": {},
474
+ "output_type": "execute_result"
475
+ }
476
+ ],
477
+ "source": [
478
+ "# Well but we want complete words and want to do this for entire text, may be we can use Python's split funciton\n",
479
+ "\n",
480
+ "words = text.split(\" \")\n",
481
+ "len(words)"
482
+ ]
483
+ },
484
+ {
485
+ "cell_type": "code",
486
+ "execution_count": 83,
487
+ "id": "56ec5613",
488
+ "metadata": {},
489
+ "outputs": [],
490
+ "source": [
491
+ "chunks = []\n",
492
+ "\n",
493
+ "s = \"\"\n",
494
+ "for word in words:\n",
495
+ " s += word + \" \"\n",
496
+ " if len(s)>200:\n",
497
+ " chunks.append(s)\n",
498
+ " s = \"\"\n",
499
+ " \n",
500
+ "chunks.append(s)"
501
+ ]
502
+ },
503
+ {
504
+ "cell_type": "code",
505
+ "execution_count": 84,
506
+ "id": "95d902bd",
507
+ "metadata": {
508
+ "scrolled": true
509
+ },
510
+ "outputs": [
511
+ {
512
+ "data": {
513
+ "text/plain": [
514
+ "['Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. \\nIt stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt ',\n",
515
+ " 'Damon, and Michael Caine. \\nSet in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in ']"
516
+ ]
517
+ },
518
+ "execution_count": 84,
519
+ "metadata": {},
520
+ "output_type": "execute_result"
521
+ }
522
+ ],
523
+ "source": [
524
+ "chunks[:2]"
525
+ ]
526
+ },
527
+ {
528
+ "cell_type": "markdown",
529
+ "id": "ff06ebc4",
530
+ "metadata": {},
531
+ "source": [
532
+ "**Splitting data into chunks can be done in native python but it is a tidious process. Also if necessary, you may need to experiment with various delimiters in an iterative manner to ensure that each chunk does not exceed the token length limit of the respective LLM.**\n",
533
+ "\n",
534
+ "**Langchain provides a better way through text splitter classes.**"
535
+ ]
536
+ },
537
+ {
538
+ "cell_type": "markdown",
539
+ "id": "f64b2909",
540
+ "metadata": {},
541
+ "source": [
542
+ "#### Using Text Splitter Classes from Langchain\n",
543
+ "\n",
544
+ "#### CharacterTextSplitter"
545
+ ]
546
+ },
547
+ {
548
+ "cell_type": "code",
549
+ "execution_count": 85,
550
+ "id": "9505bedd",
551
+ "metadata": {},
552
+ "outputs": [],
553
+ "source": [
554
+ "from langchain.text_splitter import CharacterTextSplitter\n",
555
+ "\n",
556
+ "splitter = CharacterTextSplitter(\n",
557
+ " separator = \"\\n\",\n",
558
+ " chunk_size=200,\n",
559
+ " chunk_overlap=0\n",
560
+ ")"
561
+ ]
562
+ },
563
+ {
564
+ "cell_type": "code",
565
+ "execution_count": 86,
566
+ "id": "4d86bc63",
567
+ "metadata": {},
568
+ "outputs": [
569
+ {
570
+ "name": "stderr",
571
+ "output_type": "stream",
572
+ "text": [
573
+ "Created a chunk of size 210, which is longer than the specified 200\n",
574
+ "Created a chunk of size 208, which is longer than the specified 200\n",
575
+ "Created a chunk of size 358, which is longer than the specified 200\n"
576
+ ]
577
+ },
578
+ {
579
+ "data": {
580
+ "text/plain": [
581
+ "9"
582
+ ]
583
+ },
584
+ "execution_count": 86,
585
+ "metadata": {},
586
+ "output_type": "execute_result"
587
+ }
588
+ ],
589
+ "source": [
590
+ "chunks = splitter.split_text(text)\n",
591
+ "len(chunks)"
592
+ ]
593
+ },
594
+ {
595
+ "cell_type": "code",
596
+ "execution_count": 87,
597
+ "id": "e027b9de",
598
+ "metadata": {},
599
+ "outputs": [
600
+ {
601
+ "name": "stdout",
602
+ "output_type": "stream",
603
+ "text": [
604
+ "105\n",
605
+ "120\n",
606
+ "210\n",
607
+ "181\n",
608
+ "197\n",
609
+ "207\n",
610
+ "128\n",
611
+ "357\n",
612
+ "253\n"
613
+ ]
614
+ }
615
+ ],
616
+ "source": [
617
+ "for chunk in chunks:\n",
618
+ " print(len(chunk))"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "markdown",
623
+ "id": "dcc73da3",
624
+ "metadata": {},
625
+ "source": [
626
+ "As you can see, all though we gave 200 as a chunk size since the split was based on \\n, it ended up creating chunks that are bigger than size 200. "
627
+ ]
628
+ },
629
+ {
630
+ "cell_type": "markdown",
631
+ "id": "f86f3a10",
632
+ "metadata": {},
633
+ "source": [
634
+ "Another class from Langchain can be used to recursively split the text based on a list of separators. This class is RecursiveTextSplitter. Let's see how it works"
635
+ ]
636
+ },
637
+ {
638
+ "cell_type": "markdown",
639
+ "id": "00a61cf1",
640
+ "metadata": {},
641
+ "source": [
642
+ "#### RecursiveTextSplitter"
643
+ ]
644
+ },
645
+ {
646
+ "cell_type": "code",
647
+ "execution_count": 88,
648
+ "id": "dacf5e8b",
649
+ "metadata": {},
650
+ "outputs": [
651
+ {
652
+ "data": {
653
+ "text/plain": [
654
+ "'Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. \\nIt stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. \\nSet in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.\\n\\nBrothers Christopher and Jonathan Nolan wrote the screenplay, which had its origins in a script Jonathan developed in 2007 and was originally set to be directed by Steven Spielberg. \\nKip Thorne, a Caltech theoretical physicist and 2017 Nobel laureate in Physics,[4] was an executive producer, acted as a scientific consultant, and wrote a tie-in book, The Science of Interstellar. \\nCinematographer Hoyte van Hoytema shot it on 35 mm movie film in the Panavision anamorphic format and IMAX 70 mm. Principal photography began in late 2013 and took place in Alberta, Iceland, and Los Angeles. \\nInterstellar uses extensive practical and miniature effects, and the company Double Negative created additional digital effects.\\n\\nInterstellar premiered in Los Angeles on October 26, 2014. In the United States, it was first released on film stock, expanding to venues using digital projectors. The film received generally positive reviews from critics and grossed over $677 million worldwide ($715 million after subsequent re-releases), making it the tenth-highest-grossing film of 2014. \\nIt has been praised by astronomers for its scientific accuracy and portrayal of theoretical astrophysics.[5][6][7] Interstellar was nominated for five awards at the 87th Academy Awards, winning Best Visual Effects, and received numerous other accolades.'"
655
+ ]
656
+ },
657
+ "execution_count": 88,
658
+ "metadata": {},
659
+ "output_type": "execute_result"
660
+ }
661
+ ],
662
+ "source": [
663
+ "text"
664
+ ]
665
+ },
666
+ {
667
+ "cell_type": "code",
668
+ "execution_count": 117,
669
+ "id": "848eae34",
670
+ "metadata": {},
671
+ "outputs": [],
672
+ "source": [
673
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
674
+ "\n",
675
+ "r_splitter = RecursiveCharacterTextSplitter(\n",
676
+ " separators = [\"\\n\\n\", \"\\n\", \" \"], # List of separators based on requirement (defaults to [\"\\n\\n\", \"\\n\", \" \"])\n",
677
+ " chunk_size = 200, # size of each chunk created\n",
678
+ " chunk_overlap = 0, # size of overlap between chunks in order to maintain the context\n",
679
+ " length_function = len # Function to calculate size, currently we are using \"len\" which denotes length of string however you can pass any token counter)\n",
680
+ ")"
681
+ ]
682
+ },
683
+ {
684
+ "cell_type": "code",
685
+ "execution_count": 118,
686
+ "id": "1151c51d",
687
+ "metadata": {},
688
+ "outputs": [
689
+ {
690
+ "name": "stdout",
691
+ "output_type": "stream",
692
+ "text": [
693
+ "105\n",
694
+ "120\n",
695
+ "199\n",
696
+ "10\n",
697
+ "181\n",
698
+ "197\n",
699
+ "198\n",
700
+ "8\n",
701
+ "128\n",
702
+ "191\n",
703
+ "165\n",
704
+ "198\n",
705
+ "54\n"
706
+ ]
707
+ }
708
+ ],
709
+ "source": [
710
+ "chunks = r_splitter.split_text(text)\n",
711
+ "\n",
712
+ "for chunk in chunks:\n",
713
+ " print(len(chunk))"
714
+ ]
715
+ },
716
+ {
717
+ "cell_type": "markdown",
718
+ "id": "32135f4d",
719
+ "metadata": {},
720
+ "source": [
721
+ "**Let's understand how exactly it formed these chunks**"
722
+ ]
723
+ },
724
+ {
725
+ "cell_type": "code",
726
+ "execution_count": 109,
727
+ "id": "57ef6974",
728
+ "metadata": {},
729
+ "outputs": [
730
+ {
731
+ "data": {
732
+ "text/plain": [
733
+ "'Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. \\nIt stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. \\nSet in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.'"
734
+ ]
735
+ },
736
+ "execution_count": 109,
737
+ "metadata": {},
738
+ "output_type": "execute_result"
739
+ }
740
+ ],
741
+ "source": [
742
+ "first_split = text.split(\"\\n\\n\")[0]\n",
743
+ "first_split"
744
+ ]
745
+ },
746
+ {
747
+ "cell_type": "code",
748
+ "execution_count": 110,
749
+ "id": "2bc7719f",
750
+ "metadata": {},
751
+ "outputs": [
752
+ {
753
+ "data": {
754
+ "text/plain": [
755
+ "439"
756
+ ]
757
+ },
758
+ "execution_count": 110,
759
+ "metadata": {},
760
+ "output_type": "execute_result"
761
+ }
762
+ ],
763
+ "source": [
764
+ "len(first_split)"
765
+ ]
766
+ },
767
+ {
768
+ "cell_type": "markdown",
769
+ "id": "e1018d24",
770
+ "metadata": {},
771
+ "source": [
772
+ "Recursive text splitter uses a list of separators, i.e. separators = [\"\\n\\n\", \"\\n\", \".\"]\n",
773
+ "\n",
774
+ "So now it will first split using \\n\\n and then if the resulting chunk size is greater than the chunk_size parameter which is 200\n",
775
+ "in our case, then it will use the next separator which is \\n"
776
+ ]
777
+ },
778
+ {
779
+ "cell_type": "code",
780
+ "execution_count": 119,
781
+ "id": "739cef71",
782
+ "metadata": {},
783
+ "outputs": [
784
+ {
785
+ "data": {
786
+ "text/plain": [
787
+ "['Interstellar is a 2014 epic science fiction film co-written, directed, and produced by Christopher Nolan. ',\n",
788
+ " 'It stars Matthew McConaughey, Anne Hathaway, Jessica Chastain, Bill Irwin, Ellen Burstyn, Matt Damon, and Michael Caine. ',\n",
789
+ " 'Set in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.']"
790
+ ]
791
+ },
792
+ "execution_count": 119,
793
+ "metadata": {},
794
+ "output_type": "execute_result"
795
+ }
796
+ ],
797
+ "source": [
798
+ "second_split = first_split.split(\"\\n\")\n",
799
+ "second_split"
800
+ ]
801
+ },
802
+ {
803
+ "cell_type": "code",
804
+ "execution_count": 120,
805
+ "id": "903f5921",
806
+ "metadata": {},
807
+ "outputs": [
808
+ {
809
+ "name": "stdout",
810
+ "output_type": "stream",
811
+ "text": [
812
+ "106\n",
813
+ "121\n",
814
+ "210\n"
815
+ ]
816
+ }
817
+ ],
818
+ "source": [
819
+ "for split in second_split:\n",
820
+ " print(len(split))"
821
+ ]
822
+ },
823
+ {
824
+ "cell_type": "markdown",
825
+ "id": "ed1e69a7",
826
+ "metadata": {},
827
+ "source": [
828
+ "Third split exceeds chunk size 200. Now it will further try to split that using the third separator which is ' ' (space)"
829
+ ]
830
+ },
831
+ {
832
+ "cell_type": "code",
833
+ "execution_count": 115,
834
+ "id": "69f4da9d",
835
+ "metadata": {},
836
+ "outputs": [
837
+ {
838
+ "data": {
839
+ "text/plain": [
840
+ "'Set in a dystopian future where humanity is embroiled in a catastrophic blight and famine, the film follows a group of astronauts who travel through a wormhole near Saturn in search of a new home for humankind.'"
841
+ ]
842
+ },
843
+ "execution_count": 115,
844
+ "metadata": {},
845
+ "output_type": "execute_result"
846
+ }
847
+ ],
848
+ "source": [
849
+ "second_split[2]"
850
+ ]
851
+ },
852
+ {
853
+ "cell_type": "markdown",
854
+ "id": "7fda659a",
855
+ "metadata": {},
856
+ "source": [
857
+ "When you split this using space (i.e. second_split[2].split(\" \")), it will separate out each word and then it will merge those \n",
858
+ "chunks such that their size is close to 200"
859
+ ]
860
+ },
861
+ {
862
+ "cell_type": "markdown",
863
+ "id": "6f538e50",
864
+ "metadata": {},
865
+ "source": [
866
+ "<img src=\"chunk_size.jpg\"/>"
867
+ ]
868
+ }
869
+ ],
870
+ "metadata": {
871
+ "kernelspec": {
872
+ "display_name": "Python 3 (ipykernel)",
873
+ "language": "python",
874
+ "name": "python3"
875
+ },
876
+ "language_info": {
877
+ "codemirror_mode": {
878
+ "name": "ipython",
879
+ "version": 3
880
+ },
881
+ "file_extension": ".py",
882
+ "mimetype": "text/x-python",
883
+ "name": "python",
884
+ "nbconvert_exporter": "python",
885
+ "pygments_lexer": "ipython3",
886
+ "version": "3.10.11"
887
+ }
888
+ },
889
+ "nbformat": 4,
890
+ "nbformat_minor": 5
891
+ }
vector_index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cdd74f971a1f9d3e73e47ac09fad6d1c27711fa92c9b2393a387838f405d033
3
+ size 291235