File size: 56,333 Bytes
ed4d993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use LangChain, GPT and Activeloop's Deep Lake to work with code base\n",
    "In this tutorial, we are going to use Langchain + Activeloop's Deep Lake with GPT to analyze the code base of the LangChain itself. "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Design"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. Prepare data:\n",
    "   1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
    "   2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
    "   3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
    "2. Question-Answering:\n",
    "   1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
    "   2. Prepare questions.\n",
    "   3. Get answers running the chain.\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Implementation"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Integration preparations"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to set up keys for external services and install necessary python libraries."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#!python3 -m pip install --upgrade langchain deeplake openai"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Set up OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. \n",
    "\n",
    "For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from getpass import getpass\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = getpass()\n",
    "# Please manually enter OpenAI Key"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at [app.activeloop.ai](https://app.activeloop.ai)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "activeloop_token = getpass(\"Activeloop Token:\")\n",
    "os.environ[\"ACTIVELOOP_TOKEN\"] = activeloop_token"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare data "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load all repository files. Here we assume this notebook is downloaded as the part of the langchain fork and we work with the python files of the `langchain` repo.\n",
    "\n",
    "If you want to use files from different repo, change `root_dir` to the root dir of your repo."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CITATION.cff  MIGRATE.md  README.md  libs\t  poetry.toml\n",
      "LICENSE       Makefile\t  docs\t     poetry.lock  pyproject.toml\n"
     ]
    }
   ],
   "source": [
    "!ls \"../../../../../../libs\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2554\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "\n",
    "root_dir = \"../../../../../../libs\"\n",
    "\n",
    "docs = []\n",
    "for dirpath, dirnames, filenames in os.walk(root_dir):\n",
    "    for file in filenames:\n",
    "        if file.endswith(\".py\") and \"*venv/\" not in dirpath:\n",
    "            try:\n",
    "                loader = TextLoader(os.path.join(dirpath, file), encoding=\"utf-8\")\n",
    "                docs.extend(loader.load_and_split())\n",
    "            except Exception:\n",
    "                pass\n",
    "print(f\"{len(docs)}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then, chunk the files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Created a chunk of size 1010, which is longer than the specified 1000\n",
      "Created a chunk of size 3466, which is longer than the specified 1000\n",
      "Created a chunk of size 1375, which is longer than the specified 1000\n",
      "Created a chunk of size 1928, which is longer than the specified 1000\n",
      "Created a chunk of size 1075, which is longer than the specified 1000\n",
      "Created a chunk of size 1063, which is longer than the specified 1000\n",
      "Created a chunk of size 1083, which is longer than the specified 1000\n",
      "Created a chunk of size 1074, which is longer than the specified 1000\n",
      "Created a chunk of size 1591, which is longer than the specified 1000\n",
      "Created a chunk of size 2300, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1018, which is longer than the specified 1000\n",
      "Created a chunk of size 2787, which is longer than the specified 1000\n",
      "Created a chunk of size 1018, which is longer than the specified 1000\n",
      "Created a chunk of size 2311, which is longer than the specified 1000\n",
      "Created a chunk of size 2811, which is longer than the specified 1000\n",
      "Created a chunk of size 1186, which is longer than the specified 1000\n",
      "Created a chunk of size 1497, which is longer than the specified 1000\n",
      "Created a chunk of size 1043, which is longer than the specified 1000\n",
      "Created a chunk of size 1020, which is longer than the specified 1000\n",
      "Created a chunk of size 1232, which is longer than the specified 1000\n",
      "Created a chunk of size 1334, which is longer than the specified 1000\n",
      "Created a chunk of size 1221, which is longer than the specified 1000\n",
      "Created a chunk of size 2229, which is longer than the specified 1000\n",
      "Created a chunk of size 1027, which is longer than the specified 1000\n",
      "Created a chunk of size 1361, which is longer than the specified 1000\n",
      "Created a chunk of size 1057, which is longer than the specified 1000\n",
      "Created a chunk of size 1204, which is longer than the specified 1000\n",
      "Created a chunk of size 1420, which is longer than the specified 1000\n",
      "Created a chunk of size 1298, which is longer than the specified 1000\n",
      "Created a chunk of size 1062, which is longer than the specified 1000\n",
      "Created a chunk of size 1008, which is longer than the specified 1000\n",
      "Created a chunk of size 1025, which is longer than the specified 1000\n",
      "Created a chunk of size 1206, which is longer than the specified 1000\n",
      "Created a chunk of size 1202, which is longer than the specified 1000\n",
      "Created a chunk of size 1206, which is longer than the specified 1000\n",
      "Created a chunk of size 1272, which is longer than the specified 1000\n",
      "Created a chunk of size 1092, which is longer than the specified 1000\n",
      "Created a chunk of size 1303, which is longer than the specified 1000\n",
      "Created a chunk of size 1029, which is longer than the specified 1000\n",
      "Created a chunk of size 1117, which is longer than the specified 1000\n",
      "Created a chunk of size 1438, which is longer than the specified 1000\n",
      "Created a chunk of size 3055, which is longer than the specified 1000\n",
      "Created a chunk of size 1628, which is longer than the specified 1000\n",
      "Created a chunk of size 1566, which is longer than the specified 1000\n",
      "Created a chunk of size 1179, which is longer than the specified 1000\n",
      "Created a chunk of size 1006, which is longer than the specified 1000\n",
      "Created a chunk of size 1213, which is longer than the specified 1000\n",
      "Created a chunk of size 2461, which is longer than the specified 1000\n",
      "Created a chunk of size 1849, which is longer than the specified 1000\n",
      "Created a chunk of size 1398, which is longer than the specified 1000\n",
      "Created a chunk of size 1469, which is longer than the specified 1000\n",
      "Created a chunk of size 1220, which is longer than the specified 1000\n",
      "Created a chunk of size 1048, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1052, which is longer than the specified 1000\n",
      "Created a chunk of size 1052, which is longer than the specified 1000\n",
      "Created a chunk of size 1304, which is longer than the specified 1000\n",
      "Created a chunk of size 1147, which is longer than the specified 1000\n",
      "Created a chunk of size 1236, which is longer than the specified 1000\n",
      "Created a chunk of size 1411, which is longer than the specified 1000\n",
      "Created a chunk of size 1181, which is longer than the specified 1000\n",
      "Created a chunk of size 1357, which is longer than the specified 1000\n",
      "Created a chunk of size 1706, which is longer than the specified 1000\n",
      "Created a chunk of size 1099, which is longer than the specified 1000\n",
      "Created a chunk of size 1221, which is longer than the specified 1000\n",
      "Created a chunk of size 1066, which is longer than the specified 1000\n",
      "Created a chunk of size 1223, which is longer than the specified 1000\n",
      "Created a chunk of size 1202, which is longer than the specified 1000\n",
      "Created a chunk of size 2806, which is longer than the specified 1000\n",
      "Created a chunk of size 1180, which is longer than the specified 1000\n",
      "Created a chunk of size 1338, which is longer than the specified 1000\n",
      "Created a chunk of size 1074, which is longer than the specified 1000\n",
      "Created a chunk of size 1025, which is longer than the specified 1000\n",
      "Created a chunk of size 1017, which is longer than the specified 1000\n",
      "Created a chunk of size 1497, which is longer than the specified 1000\n",
      "Created a chunk of size 1151, which is longer than the specified 1000\n",
      "Created a chunk of size 1287, which is longer than the specified 1000\n",
      "Created a chunk of size 1359, which is longer than the specified 1000\n",
      "Created a chunk of size 1075, which is longer than the specified 1000\n",
      "Created a chunk of size 1037, which is longer than the specified 1000\n",
      "Created a chunk of size 1080, which is longer than the specified 1000\n",
      "Created a chunk of size 1354, which is longer than the specified 1000\n",
      "Created a chunk of size 1033, which is longer than the specified 1000\n",
      "Created a chunk of size 1473, which is longer than the specified 1000\n",
      "Created a chunk of size 1074, which is longer than the specified 1000\n",
      "Created a chunk of size 2091, which is longer than the specified 1000\n",
      "Created a chunk of size 1388, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1158, which is longer than the specified 1000\n",
      "Created a chunk of size 1683, which is longer than the specified 1000\n",
      "Created a chunk of size 2424, which is longer than the specified 1000\n",
      "Created a chunk of size 1877, which is longer than the specified 1000\n",
      "Created a chunk of size 1002, which is longer than the specified 1000\n",
      "Created a chunk of size 2175, which is longer than the specified 1000\n",
      "Created a chunk of size 1011, which is longer than the specified 1000\n",
      "Created a chunk of size 1915, which is longer than the specified 1000\n",
      "Created a chunk of size 1587, which is longer than the specified 1000\n",
      "Created a chunk of size 1969, which is longer than the specified 1000\n",
      "Created a chunk of size 1687, which is longer than the specified 1000\n",
      "Created a chunk of size 1732, which is longer than the specified 1000\n",
      "Created a chunk of size 1322, which is longer than the specified 1000\n",
      "Created a chunk of size 1339, which is longer than the specified 1000\n",
      "Created a chunk of size 3083, which is longer than the specified 1000\n",
      "Created a chunk of size 2148, which is longer than the specified 1000\n",
      "Created a chunk of size 1647, which is longer than the specified 1000\n",
      "Created a chunk of size 1698, which is longer than the specified 1000\n",
      "Created a chunk of size 1012, which is longer than the specified 1000\n",
      "Created a chunk of size 1919, which is longer than the specified 1000\n",
      "Created a chunk of size 1676, which is longer than the specified 1000\n",
      "Created a chunk of size 1581, which is longer than the specified 1000\n",
      "Created a chunk of size 2559, which is longer than the specified 1000\n",
      "Created a chunk of size 1247, which is longer than the specified 1000\n",
      "Created a chunk of size 1220, which is longer than the specified 1000\n",
      "Created a chunk of size 1768, which is longer than the specified 1000\n",
      "Created a chunk of size 1287, which is longer than the specified 1000\n",
      "Created a chunk of size 1300, which is longer than the specified 1000\n",
      "Created a chunk of size 1390, which is longer than the specified 1000\n",
      "Created a chunk of size 1423, which is longer than the specified 1000\n",
      "Created a chunk of size 1018, which is longer than the specified 1000\n",
      "Created a chunk of size 1185, which is longer than the specified 1000\n",
      "Created a chunk of size 2858, which is longer than the specified 1000\n",
      "Created a chunk of size 1149, which is longer than the specified 1000\n",
      "Created a chunk of size 1730, which is longer than the specified 1000\n",
      "Created a chunk of size 1026, which is longer than the specified 1000\n",
      "Created a chunk of size 1913, which is longer than the specified 1000\n",
      "Created a chunk of size 1362, which is longer than the specified 1000\n",
      "Created a chunk of size 1324, which is longer than the specified 1000\n",
      "Created a chunk of size 1073, which is longer than the specified 1000\n",
      "Created a chunk of size 1455, which is longer than the specified 1000\n",
      "Created a chunk of size 1621, which is longer than the specified 1000\n",
      "Created a chunk of size 1516, which is longer than the specified 1000\n",
      "Created a chunk of size 1633, which is longer than the specified 1000\n",
      "Created a chunk of size 1620, which is longer than the specified 1000\n",
      "Created a chunk of size 1856, which is longer than the specified 1000\n",
      "Created a chunk of size 1562, which is longer than the specified 1000\n",
      "Created a chunk of size 1729, which is longer than the specified 1000\n",
      "Created a chunk of size 1203, which is longer than the specified 1000\n",
      "Created a chunk of size 1307, which is longer than the specified 1000\n",
      "Created a chunk of size 1331, which is longer than the specified 1000\n",
      "Created a chunk of size 1295, which is longer than the specified 1000\n",
      "Created a chunk of size 1101, which is longer than the specified 1000\n",
      "Created a chunk of size 1090, which is longer than the specified 1000\n",
      "Created a chunk of size 1241, which is longer than the specified 1000\n",
      "Created a chunk of size 1138, which is longer than the specified 1000\n",
      "Created a chunk of size 1076, which is longer than the specified 1000\n",
      "Created a chunk of size 1210, which is longer than the specified 1000\n",
      "Created a chunk of size 1183, which is longer than the specified 1000\n",
      "Created a chunk of size 1353, which is longer than the specified 1000\n",
      "Created a chunk of size 1271, which is longer than the specified 1000\n",
      "Created a chunk of size 1778, which is longer than the specified 1000\n",
      "Created a chunk of size 1141, which is longer than the specified 1000\n",
      "Created a chunk of size 1099, which is longer than the specified 1000\n",
      "Created a chunk of size 2090, which is longer than the specified 1000\n",
      "Created a chunk of size 1056, which is longer than the specified 1000\n",
      "Created a chunk of size 1120, which is longer than the specified 1000\n",
      "Created a chunk of size 1048, which is longer than the specified 1000\n",
      "Created a chunk of size 1072, which is longer than the specified 1000\n",
      "Created a chunk of size 1367, which is longer than the specified 1000\n",
      "Created a chunk of size 1246, which is longer than the specified 1000\n",
      "Created a chunk of size 1766, which is longer than the specified 1000\n",
      "Created a chunk of size 1105, which is longer than the specified 1000\n",
      "Created a chunk of size 1400, which is longer than the specified 1000\n",
      "Created a chunk of size 1488, which is longer than the specified 1000\n",
      "Created a chunk of size 1672, which is longer than the specified 1000\n",
      "Created a chunk of size 1137, which is longer than the specified 1000\n",
      "Created a chunk of size 1500, which is longer than the specified 1000\n",
      "Created a chunk of size 1224, which is longer than the specified 1000\n",
      "Created a chunk of size 1414, which is longer than the specified 1000\n",
      "Created a chunk of size 1242, which is longer than the specified 1000\n",
      "Created a chunk of size 1551, which is longer than the specified 1000\n",
      "Created a chunk of size 1268, which is longer than the specified 1000\n",
      "Created a chunk of size 1130, which is longer than the specified 1000\n",
      "Created a chunk of size 2023, which is longer than the specified 1000\n",
      "Created a chunk of size 1878, which is longer than the specified 1000\n",
      "Created a chunk of size 1364, which is longer than the specified 1000\n",
      "Created a chunk of size 1212, which is longer than the specified 1000\n",
      "Created a chunk of size 1792, which is longer than the specified 1000\n",
      "Created a chunk of size 1055, which is longer than the specified 1000\n",
      "Created a chunk of size 1496, which is longer than the specified 1000\n",
      "Created a chunk of size 1045, which is longer than the specified 1000\n",
      "Created a chunk of size 1501, which is longer than the specified 1000\n",
      "Created a chunk of size 1208, which is longer than the specified 1000\n",
      "Created a chunk of size 1356, which is longer than the specified 1000\n",
      "Created a chunk of size 1351, which is longer than the specified 1000\n",
      "Created a chunk of size 1130, which is longer than the specified 1000\n",
      "Created a chunk of size 1133, which is longer than the specified 1000\n",
      "Created a chunk of size 1381, which is longer than the specified 1000\n",
      "Created a chunk of size 1120, which is longer than the specified 1000\n",
      "Created a chunk of size 1200, which is longer than the specified 1000\n",
      "Created a chunk of size 1202, which is longer than the specified 1000\n",
      "Created a chunk of size 1149, which is longer than the specified 1000\n",
      "Created a chunk of size 1196, which is longer than the specified 1000\n",
      "Created a chunk of size 3173, which is longer than the specified 1000\n",
      "Created a chunk of size 1106, which is longer than the specified 1000\n",
      "Created a chunk of size 1211, which is longer than the specified 1000\n",
      "Created a chunk of size 1530, which is longer than the specified 1000\n",
      "Created a chunk of size 1471, which is longer than the specified 1000\n",
      "Created a chunk of size 1353, which is longer than the specified 1000\n",
      "Created a chunk of size 1279, which is longer than the specified 1000\n",
      "Created a chunk of size 1101, which is longer than the specified 1000\n",
      "Created a chunk of size 1123, which is longer than the specified 1000\n",
      "Created a chunk of size 1848, which is longer than the specified 1000\n",
      "Created a chunk of size 1197, which is longer than the specified 1000\n",
      "Created a chunk of size 1235, which is longer than the specified 1000\n",
      "Created a chunk of size 1314, which is longer than the specified 1000\n",
      "Created a chunk of size 1043, which is longer than the specified 1000\n",
      "Created a chunk of size 1183, which is longer than the specified 1000\n",
      "Created a chunk of size 1182, which is longer than the specified 1000\n",
      "Created a chunk of size 1269, which is longer than the specified 1000\n",
      "Created a chunk of size 1416, which is longer than the specified 1000\n",
      "Created a chunk of size 1462, which is longer than the specified 1000\n",
      "Created a chunk of size 1120, which is longer than the specified 1000\n",
      "Created a chunk of size 1033, which is longer than the specified 1000\n",
      "Created a chunk of size 1143, which is longer than the specified 1000\n",
      "Created a chunk of size 1537, which is longer than the specified 1000\n",
      "Created a chunk of size 1381, which is longer than the specified 1000\n",
      "Created a chunk of size 2286, which is longer than the specified 1000\n",
      "Created a chunk of size 1175, which is longer than the specified 1000\n",
      "Created a chunk of size 1187, which is longer than the specified 1000\n",
      "Created a chunk of size 1494, which is longer than the specified 1000\n",
      "Created a chunk of size 1597, which is longer than the specified 1000\n",
      "Created a chunk of size 1203, which is longer than the specified 1000\n",
      "Created a chunk of size 1058, which is longer than the specified 1000\n",
      "Created a chunk of size 1261, which is longer than the specified 1000\n",
      "Created a chunk of size 1189, which is longer than the specified 1000\n",
      "Created a chunk of size 1388, which is longer than the specified 1000\n",
      "Created a chunk of size 1224, which is longer than the specified 1000\n",
      "Created a chunk of size 1226, which is longer than the specified 1000\n",
      "Created a chunk of size 1289, which is longer than the specified 1000\n",
      "Created a chunk of size 1157, which is longer than the specified 1000\n",
      "Created a chunk of size 1095, which is longer than the specified 1000\n",
      "Created a chunk of size 2196, which is longer than the specified 1000\n",
      "Created a chunk of size 1029, which is longer than the specified 1000\n",
      "Created a chunk of size 1077, which is longer than the specified 1000\n",
      "Created a chunk of size 1848, which is longer than the specified 1000\n",
      "Created a chunk of size 1095, which is longer than the specified 1000\n",
      "Created a chunk of size 1418, which is longer than the specified 1000\n",
      "Created a chunk of size 1069, which is longer than the specified 1000\n",
      "Created a chunk of size 2573, which is longer than the specified 1000\n",
      "Created a chunk of size 1512, which is longer than the specified 1000\n",
      "Created a chunk of size 1046, which is longer than the specified 1000\n",
      "Created a chunk of size 1792, which is longer than the specified 1000\n",
      "Created a chunk of size 1042, which is longer than the specified 1000\n",
      "Created a chunk of size 1125, which is longer than the specified 1000\n",
      "Created a chunk of size 1165, which is longer than the specified 1000\n",
      "Created a chunk of size 1030, which is longer than the specified 1000\n",
      "Created a chunk of size 1484, which is longer than the specified 1000\n",
      "Created a chunk of size 2796, which is longer than the specified 1000\n",
      "Created a chunk of size 1026, which is longer than the specified 1000\n",
      "Created a chunk of size 1726, which is longer than the specified 1000\n",
      "Created a chunk of size 1628, which is longer than the specified 1000\n",
      "Created a chunk of size 1881, which is longer than the specified 1000\n",
      "Created a chunk of size 1441, which is longer than the specified 1000\n",
      "Created a chunk of size 1175, which is longer than the specified 1000\n",
      "Created a chunk of size 1360, which is longer than the specified 1000\n",
      "Created a chunk of size 1210, which is longer than the specified 1000\n",
      "Created a chunk of size 1425, which is longer than the specified 1000\n",
      "Created a chunk of size 1560, which is longer than the specified 1000\n",
      "Created a chunk of size 1131, which is longer than the specified 1000\n",
      "Created a chunk of size 1276, which is longer than the specified 1000\n",
      "Created a chunk of size 1068, which is longer than the specified 1000\n",
      "Created a chunk of size 1494, which is longer than the specified 1000\n",
      "Created a chunk of size 1246, which is longer than the specified 1000\n",
      "Created a chunk of size 2621, which is longer than the specified 1000\n",
      "Created a chunk of size 1264, which is longer than the specified 1000\n",
      "Created a chunk of size 1166, which is longer than the specified 1000\n",
      "Created a chunk of size 1332, which is longer than the specified 1000\n",
      "Created a chunk of size 3499, which is longer than the specified 1000\n",
      "Created a chunk of size 1651, which is longer than the specified 1000\n",
      "Created a chunk of size 1794, which is longer than the specified 1000\n",
      "Created a chunk of size 2162, which is longer than the specified 1000\n",
      "Created a chunk of size 1061, which is longer than the specified 1000\n",
      "Created a chunk of size 1083, which is longer than the specified 1000\n",
      "Created a chunk of size 1018, which is longer than the specified 1000\n",
      "Created a chunk of size 1751, which is longer than the specified 1000\n",
      "Created a chunk of size 1301, which is longer than the specified 1000\n",
      "Created a chunk of size 1025, which is longer than the specified 1000\n",
      "Created a chunk of size 1489, which is longer than the specified 1000\n",
      "Created a chunk of size 1481, which is longer than the specified 1000\n",
      "Created a chunk of size 1505, which is longer than the specified 1000\n",
      "Created a chunk of size 1497, which is longer than the specified 1000\n",
      "Created a chunk of size 1505, which is longer than the specified 1000\n",
      "Created a chunk of size 1282, which is longer than the specified 1000\n",
      "Created a chunk of size 1224, which is longer than the specified 1000\n",
      "Created a chunk of size 1261, which is longer than the specified 1000\n",
      "Created a chunk of size 1123, which is longer than the specified 1000\n",
      "Created a chunk of size 1137, which is longer than the specified 1000\n",
      "Created a chunk of size 2183, which is longer than the specified 1000\n",
      "Created a chunk of size 1039, which is longer than the specified 1000\n",
      "Created a chunk of size 1135, which is longer than the specified 1000\n",
      "Created a chunk of size 1254, which is longer than the specified 1000\n",
      "Created a chunk of size 1234, which is longer than the specified 1000\n",
      "Created a chunk of size 1111, which is longer than the specified 1000\n",
      "Created a chunk of size 1135, which is longer than the specified 1000\n",
      "Created a chunk of size 2023, which is longer than the specified 1000\n",
      "Created a chunk of size 1216, which is longer than the specified 1000\n",
      "Created a chunk of size 1013, which is longer than the specified 1000\n",
      "Created a chunk of size 1152, which is longer than the specified 1000\n",
      "Created a chunk of size 1087, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1330, which is longer than the specified 1000\n",
      "Created a chunk of size 2342, which is longer than the specified 1000\n",
      "Created a chunk of size 1940, which is longer than the specified 1000\n",
      "Created a chunk of size 1621, which is longer than the specified 1000\n",
      "Created a chunk of size 2169, which is longer than the specified 1000\n",
      "Created a chunk of size 1824, which is longer than the specified 1000\n",
      "Created a chunk of size 1554, which is longer than the specified 1000\n",
      "Created a chunk of size 1457, which is longer than the specified 1000\n",
      "Created a chunk of size 1486, which is longer than the specified 1000\n",
      "Created a chunk of size 1556, which is longer than the specified 1000\n",
      "Created a chunk of size 1012, which is longer than the specified 1000\n",
      "Created a chunk of size 1484, which is longer than the specified 1000\n",
      "Created a chunk of size 1039, which is longer than the specified 1000\n",
      "Created a chunk of size 1335, which is longer than the specified 1000\n",
      "Created a chunk of size 1684, which is longer than the specified 1000\n",
      "Created a chunk of size 1537, which is longer than the specified 1000\n",
      "Created a chunk of size 1136, which is longer than the specified 1000\n",
      "Created a chunk of size 1219, which is longer than the specified 1000\n",
      "Created a chunk of size 1011, which is longer than the specified 1000\n",
      "Created a chunk of size 1055, which is longer than the specified 1000\n",
      "Created a chunk of size 1433, which is longer than the specified 1000\n",
      "Created a chunk of size 1263, which is longer than the specified 1000\n",
      "Created a chunk of size 1014, which is longer than the specified 1000\n",
      "Created a chunk of size 1107, which is longer than the specified 1000\n",
      "Created a chunk of size 2702, which is longer than the specified 1000\n",
      "Created a chunk of size 1237, which is longer than the specified 1000\n",
      "Created a chunk of size 1172, which is longer than the specified 1000\n",
      "Created a chunk of size 1517, which is longer than the specified 1000\n",
      "Created a chunk of size 1589, which is longer than the specified 1000\n",
      "Created a chunk of size 1681, which is longer than the specified 1000\n",
      "Created a chunk of size 2244, which is longer than the specified 1000\n",
      "Created a chunk of size 1505, which is longer than the specified 1000\n",
      "Created a chunk of size 1228, which is longer than the specified 1000\n",
      "Created a chunk of size 1801, which is longer than the specified 1000\n",
      "Created a chunk of size 1856, which is longer than the specified 1000\n",
      "Created a chunk of size 2171, which is longer than the specified 1000\n",
      "Created a chunk of size 2450, which is longer than the specified 1000\n",
      "Created a chunk of size 1110, which is longer than the specified 1000\n",
      "Created a chunk of size 1148, which is longer than the specified 1000\n",
      "Created a chunk of size 1050, which is longer than the specified 1000\n",
      "Created a chunk of size 1014, which is longer than the specified 1000\n",
      "Created a chunk of size 1458, which is longer than the specified 1000\n",
      "Created a chunk of size 1270, which is longer than the specified 1000\n",
      "Created a chunk of size 1287, which is longer than the specified 1000\n",
      "Created a chunk of size 1127, which is longer than the specified 1000\n",
      "Created a chunk of size 1576, which is longer than the specified 1000\n",
      "Created a chunk of size 1350, which is longer than the specified 1000\n",
      "Created a chunk of size 2283, which is longer than the specified 1000\n",
      "Created a chunk of size 2211, which is longer than the specified 1000\n",
      "Created a chunk of size 1167, which is longer than the specified 1000\n",
      "Created a chunk of size 1038, which is longer than the specified 1000\n",
      "Created a chunk of size 1117, which is longer than the specified 1000\n",
      "Created a chunk of size 1160, which is longer than the specified 1000\n",
      "Created a chunk of size 1163, which is longer than the specified 1000\n",
      "Created a chunk of size 1013, which is longer than the specified 1000\n",
      "Created a chunk of size 1226, which is longer than the specified 1000\n",
      "Created a chunk of size 1336, which is longer than the specified 1000\n",
      "Created a chunk of size 1012, which is longer than the specified 1000\n",
      "Created a chunk of size 2833, which is longer than the specified 1000\n",
      "Created a chunk of size 1201, which is longer than the specified 1000\n",
      "Created a chunk of size 1172, which is longer than the specified 1000\n",
      "Created a chunk of size 1438, which is longer than the specified 1000\n",
      "Created a chunk of size 1259, which is longer than the specified 1000\n",
      "Created a chunk of size 1452, which is longer than the specified 1000\n",
      "Created a chunk of size 1377, which is longer than the specified 1000\n",
      "Created a chunk of size 1001, which is longer than the specified 1000\n",
      "Created a chunk of size 1240, which is longer than the specified 1000\n",
      "Created a chunk of size 1142, which is longer than the specified 1000\n",
      "Created a chunk of size 1338, which is longer than the specified 1000\n",
      "Created a chunk of size 1057, which is longer than the specified 1000\n",
      "Created a chunk of size 1040, which is longer than the specified 1000\n",
      "Created a chunk of size 1579, which is longer than the specified 1000\n",
      "Created a chunk of size 1176, which is longer than the specified 1000\n",
      "Created a chunk of size 1081, which is longer than the specified 1000\n",
      "Created a chunk of size 1751, which is longer than the specified 1000\n",
      "Created a chunk of size 1064, which is longer than the specified 1000\n",
      "Created a chunk of size 1029, which is longer than the specified 1000\n",
      "Created a chunk of size 1937, which is longer than the specified 1000\n",
      "Created a chunk of size 1972, which is longer than the specified 1000\n",
      "Created a chunk of size 1417, which is longer than the specified 1000\n",
      "Created a chunk of size 1203, which is longer than the specified 1000\n",
      "Created a chunk of size 1314, which is longer than the specified 1000\n",
      "Created a chunk of size 1088, which is longer than the specified 1000\n",
      "Created a chunk of size 1455, which is longer than the specified 1000\n",
      "Created a chunk of size 1467, which is longer than the specified 1000\n",
      "Created a chunk of size 1476, which is longer than the specified 1000\n",
      "Created a chunk of size 1354, which is longer than the specified 1000\n",
      "Created a chunk of size 1403, which is longer than the specified 1000\n",
      "Created a chunk of size 1366, which is longer than the specified 1000\n",
      "Created a chunk of size 1112, which is longer than the specified 1000\n",
      "Created a chunk of size 1512, which is longer than the specified 1000\n",
      "Created a chunk of size 1262, which is longer than the specified 1000\n",
      "Created a chunk of size 1405, which is longer than the specified 1000\n",
      "Created a chunk of size 2221, which is longer than the specified 1000\n",
      "Created a chunk of size 1128, which is longer than the specified 1000\n",
      "Created a chunk of size 1021, which is longer than the specified 1000\n",
      "Created a chunk of size 1532, which is longer than the specified 1000\n",
      "Created a chunk of size 1535, which is longer than the specified 1000\n",
      "Created a chunk of size 1230, which is longer than the specified 1000\n",
      "Created a chunk of size 2456, which is longer than the specified 1000\n",
      "Created a chunk of size 1047, which is longer than the specified 1000\n",
      "Created a chunk of size 1320, which is longer than the specified 1000\n",
      "Created a chunk of size 1144, which is longer than the specified 1000\n",
      "Created a chunk of size 1509, which is longer than the specified 1000\n",
      "Created a chunk of size 1003, which is longer than the specified 1000\n",
      "Created a chunk of size 1025, which is longer than the specified 1000\n",
      "Created a chunk of size 1197, which is longer than the specified 1000\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8244\n"
     ]
    }
   ],
   "source": [
    "from langchain_text_splitters import CharacterTextSplitter\n",
    "\n",
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(docs)\n",
    "print(f\"{len(texts)}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then embed chunks and upload them to the DeepLake.\n",
    "\n",
    "This can take several minutes. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base='', openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='', openai_organization='', allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={})"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_openai import OpenAIEmbeddings\n",
    "\n",
    "embeddings = OpenAIEmbeddings()\n",
    "embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your Deep Lake dataset has been successfully created!\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset(path='hub://adilkhan/langchain-code', tensors=['embedding', 'id', 'metadata', 'text'])\n",
      "\n",
      "  tensor      htype       shape       dtype  compression\n",
      "  -------    -------     -------     -------  ------- \n",
      " embedding  embedding  (8244, 1536)  float32   None   \n",
      "    id        text      (8244, 1)      str     None   \n",
      " metadata     json      (8244, 1)      str     None   \n",
      "   text       text      (8244, 1)      str     None   \n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": []
    },
    {
     "data": {
      "text/plain": [
       "<langchain_community.vectorstores.deeplake.DeepLake at 0x7fe1b67d7a30>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain_community.vectorstores import DeepLake\n",
    "\n",
    "username = \"<USERNAME_OR_ORG>\"\n",
    "\n",
    "\n",
    "db = DeepLake.from_documents(\n",
    "    texts, embeddings, dataset_path=f\"hub://{username}/langchain-code\", overwrite=True\n",
    ")\n",
    "db"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`Optional`: You can also use Deep Lake's Managed Tensor Database as a hosting service and run queries there. In order to do so, it is necessary to specify the runtime parameter as {'tensor_db': True} during the creation of the vector store. This configuration enables the execution of queries on the Managed Tensor Database, rather than on the client side. It should be noted that this functionality is not applicable to datasets stored locally or in-memory. In the event that a vector store has already been created outside of the Managed Tensor Database, it is possible to transfer it to the Managed Tensor Database by following the prescribed steps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from langchain_community.vectorstores import DeepLake\n",
    "\n",
    "# db = DeepLake.from_documents(\n",
    "#     texts, embeddings, dataset_path=f\"hub://{<org_id>}/langchain-code\", runtime={\"tensor_db\": True}\n",
    "# )\n",
    "# db"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Question Answering\n",
    "First load the dataset, construct the retriever, then construct the Conversational Chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Deep Lake Dataset in hub://adilkhan/langchain-code already exists, loading from the storage\n"
     ]
    }
   ],
   "source": [
    "db = DeepLake(\n",
    "    dataset_path=f\"hub://{username}/langchain-code\",\n",
    "    read_only=True,\n",
    "    embedding=embeddings,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "retriever = db.as_retriever()\n",
    "retriever.search_kwargs[\"distance_metric\"] = \"cos\"\n",
    "retriever.search_kwargs[\"fetch_k\"] = 20\n",
    "retriever.search_kwargs[\"maximal_marginal_relevance\"] = True\n",
    "retriever.search_kwargs[\"k\"] = 20"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can also specify user defined functions using [Deep Lake filters](https://docs.deeplake.ai/en/latest/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.filter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def filter(x):\n",
    "    # filter based on source code\n",
    "    if \"something\" in x[\"text\"].data()[\"value\"]:\n",
    "        return False\n",
    "\n",
    "    # filter based on path e.g. extension\n",
    "    metadata = x[\"metadata\"].data()[\"value\"]\n",
    "    return \"only_this\" in metadata[\"source\"] or \"also_that\" in metadata[\"source\"]\n",
    "\n",
    "\n",
    "### turn on below for custom filtering\n",
    "# retriever.search_kwargs['filter'] = filter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain.chains import ConversationalRetrievalChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "model = ChatOpenAI(\n",
    "    model_name=\"gpt-3.5-turbo-0613\"\n",
    ")  # 'ada' 'gpt-3.5-turbo-0613' 'gpt-4',\n",
    "qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-> **Question**: What is the class hierarchy? \n",
      "\n",
      "**Answer**: The class hierarchy for Memory is as follows:\n",
      "\n",
      "    BaseMemory --> BaseChatMemory --> <name>Memory  # Examples: ZepMemory, MotorheadMemory\n",
      "\n",
      "The class hierarchy for ChatMessageHistory is as follows:\n",
      "\n",
      "    BaseChatMessageHistory --> <name>ChatMessageHistory  # Example: ZepChatMessageHistory\n",
      "\n",
      "The class hierarchy for Prompt is as follows:\n",
      "\n",
      "    BasePromptTemplate --> PipelinePromptTemplate\n",
      "                           StringPromptTemplate --> PromptTemplate\n",
      "                                                    FewShotPromptTemplate\n",
      "                                                    FewShotPromptWithTemplates\n",
      "                           BaseChatPromptTemplate --> AutoGPTPrompt\n",
      "                                                      ChatPromptTemplate --> AgentScratchPadChatPromptTemplate\n",
      " \n",
      "\n",
      "-> **Question**: What classes are derived from the Chain class? \n",
      "\n",
      "**Answer**: The classes derived from the Chain class are:\n",
      "\n",
      "- APIChain\n",
      "- OpenAPIEndpointChain\n",
      "- AnalyzeDocumentChain\n",
      "- MapReduceDocumentsChain\n",
      "- MapRerankDocumentsChain\n",
      "- ReduceDocumentsChain\n",
      "- RefineDocumentsChain\n",
      "- StuffDocumentsChain\n",
      "- ConstitutionalChain\n",
      "- ConversationChain\n",
      "- ChatVectorDBChain\n",
      "- ConversationalRetrievalChain\n",
      "- FalkorDBQAChain\n",
      "- FlareChain\n",
      "- ArangoGraphQAChain\n",
      "- GraphQAChain\n",
      "- GraphCypherQAChain\n",
      "- HugeGraphQAChain\n",
      "- KuzuQAChain\n",
      "- NebulaGraphQAChain\n",
      "- NeptuneOpenCypherQAChain\n",
      "- GraphSparqlQAChain\n",
      "- HypotheticalDocumentEmbedder\n",
      "- LLMChain\n",
      "- LLMBashChain\n",
      "- LLMCheckerChain\n",
      "- LLMMathChain\n",
      "- LLMRequestsChain\n",
      "- LLMSummarizationCheckerChain\n",
      "- MapReduceChain\n",
      "- OpenAIModerationChain\n",
      "- NatBotChain\n",
      "- QAGenerationChain\n",
      "- QAWithSourcesChain\n",
      "- RetrievalQAWithSourcesChain\n",
      "- VectorDBQAWithSourcesChain\n",
      "- RetrievalQA\n",
      "- VectorDBQA\n",
      "- LLMRouterChain\n",
      "- MultiPromptChain\n",
      "- MultiRetrievalQAChain\n",
      "- MultiRouteChain\n",
      "- RouterChain\n",
      "- SequentialChain\n",
      "- SimpleSequentialChain\n",
      "- TransformChain\n",
      "- TaskPlaningChain\n",
      "- QueryChain\n",
      "- CPALChain\n",
      " \n",
      "\n",
      "-> **Question**: What kind of retrievers does LangChain have? \n",
      "\n",
      "**Answer**: The LangChain class includes various types of retrievers such as:\n",
      "\n",
      "- ArxivRetriever\n",
      "- AzureAISearchRetriever\n",
      "- BM25Retriever\n",
      "- ChaindeskRetriever\n",
      "- ChatGPTPluginRetriever\n",
      "- ContextualCompressionRetriever\n",
      "- DocArrayRetriever\n",
      "- ElasticSearchBM25Retriever\n",
      "- EnsembleRetriever\n",
      "- GoogleVertexAISearchRetriever\n",
      "- AmazonKendraRetriever\n",
      "- KNNRetriever\n",
      "- LlamaIndexGraphRetriever and LlamaIndexRetriever\n",
      "- MergerRetriever\n",
      "- MetalRetriever\n",
      "- MilvusRetriever\n",
      "- MultiQueryRetriever\n",
      "- ParentDocumentRetriever\n",
      "- PineconeHybridSearchRetriever\n",
      "- PubMedRetriever\n",
      "- RePhraseQueryRetriever\n",
      "- RemoteLangChainRetriever\n",
      "- SelfQueryRetriever\n",
      "- SVMRetriever\n",
      "- TFIDFRetriever\n",
      "- TimeWeightedVectorStoreRetriever\n",
      "- VespaRetriever\n",
      "- WeaviateHybridSearchRetriever\n",
      "- WebResearchRetriever\n",
      "- WikipediaRetriever\n",
      "- ZepRetriever\n",
      "- ZillizRetriever \n",
      "\n"
     ]
    }
   ],
   "source": [
    "questions = [\n",
    "    \"What is the class hierarchy?\",\n",
    "    \"What classes are derived from the Chain class?\",\n",
    "    \"What kind of retrievers does LangChain have?\",\n",
    "]\n",
    "chat_history = []\n",
    "qa_dict = {}\n",
    "\n",
    "for question in questions:\n",
    "    result = qa({\"question\": question, \"chat_history\": chat_history})\n",
    "    chat_history.append((question, result[\"answer\"]))\n",
    "    qa_dict[question] = result[\"answer\"]\n",
    "    print(f\"-> **Question**: {question} \\n\")\n",
    "    print(f\"**Answer**: {result['answer']} \\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'question': 'LangChain possesses a variety of retrievers including:\\n\\n1. ArxivRetriever\\n2. AzureAISearchRetriever\\n3. BM25Retriever\\n4. ChaindeskRetriever\\n5. ChatGPTPluginRetriever\\n6. ContextualCompressionRetriever\\n7. DocArrayRetriever\\n8. ElasticSearchBM25Retriever\\n9. EnsembleRetriever\\n10. GoogleVertexAISearchRetriever\\n11. AmazonKendraRetriever\\n12. KNNRetriever\\n13. LlamaIndexGraphRetriever\\n14. LlamaIndexRetriever\\n15. MergerRetriever\\n16. MetalRetriever\\n17. MilvusRetriever\\n18. MultiQueryRetriever\\n19. ParentDocumentRetriever\\n20. PineconeHybridSearchRetriever\\n21. PubMedRetriever\\n22. RePhraseQueryRetriever\\n23. RemoteLangChainRetriever\\n24. SelfQueryRetriever\\n25. SVMRetriever\\n26. TFIDFRetriever\\n27. TimeWeightedVectorStoreRetriever\\n28. VespaRetriever\\n29. WeaviateHybridSearchRetriever\\n30. WebResearchRetriever\\n31. WikipediaRetriever\\n32. ZepRetriever\\n33. ZillizRetriever\\n\\nIt also includes self query translators like:\\n\\n1. ChromaTranslator\\n2. DeepLakeTranslator\\n3. MyScaleTranslator\\n4. PineconeTranslator\\n5. QdrantTranslator\\n6. WeaviateTranslator\\n\\nAnd remote retrievers like:\\n\\n1. RemoteLangChainRetriever'}"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The class hierarchy for Memory is as follows:\n",
      "\n",
      "    BaseMemory --> BaseChatMemory --> <name>Memory  # Examples: ZepMemory, MotorheadMemory\n",
      "\n",
      "The class hierarchy for ChatMessageHistory is as follows:\n",
      "\n",
      "    BaseChatMessageHistory --> <name>ChatMessageHistory  # Example: ZepChatMessageHistory\n",
      "\n",
      "The class hierarchy for Prompt is as follows:\n",
      "\n",
      "    BasePromptTemplate --> PipelinePromptTemplate\n",
      "                           StringPromptTemplate --> PromptTemplate\n",
      "                                                    FewShotPromptTemplate\n",
      "                                                    FewShotPromptWithTemplates\n",
      "                           BaseChatPromptTemplate --> AutoGPTPrompt\n",
      "                                                      ChatPromptTemplate --> AgentScratchPadChatPromptTemplate\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(qa_dict[\"What is the class hierarchy?\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The classes derived from the Chain class are:\n",
      "\n",
      "- APIChain\n",
      "- OpenAPIEndpointChain\n",
      "- AnalyzeDocumentChain\n",
      "- MapReduceDocumentsChain\n",
      "- MapRerankDocumentsChain\n",
      "- ReduceDocumentsChain\n",
      "- RefineDocumentsChain\n",
      "- StuffDocumentsChain\n",
      "- ConstitutionalChain\n",
      "- ConversationChain\n",
      "- ChatVectorDBChain\n",
      "- ConversationalRetrievalChain\n",
      "- FlareChain\n",
      "- ArangoGraphQAChain\n",
      "- GraphQAChain\n",
      "- GraphCypherQAChain\n",
      "- HugeGraphQAChain\n",
      "- KuzuQAChain\n",
      "- NebulaGraphQAChain\n",
      "- NeptuneOpenCypherQAChain\n",
      "- GraphSparqlQAChain\n",
      "- HypotheticalDocumentEmbedder\n",
      "- LLMChain\n",
      "- LLMBashChain\n",
      "- LLMCheckerChain\n",
      "- LLMMathChain\n",
      "- LLMRequestsChain\n",
      "- LLMSummarizationCheckerChain\n",
      "- MapReduceChain\n",
      "- OpenAIModerationChain\n",
      "- NatBotChain\n",
      "- QAGenerationChain\n",
      "- QAWithSourcesChain\n",
      "- RetrievalQAWithSourcesChain\n",
      "- VectorDBQAWithSourcesChain\n",
      "- RetrievalQA\n",
      "- VectorDBQA\n",
      "- LLMRouterChain\n",
      "- MultiPromptChain\n",
      "- MultiRetrievalQAChain\n",
      "- MultiRouteChain\n",
      "- RouterChain\n",
      "- SequentialChain\n",
      "- SimpleSequentialChain\n",
      "- TransformChain\n",
      "- TaskPlaningChain\n",
      "- QueryChain\n",
      "- CPALChain\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(qa_dict[\"What classes are derived from the Chain class?\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The LangChain class includes various types of retrievers such as:\n",
      "\n",
      "- ArxivRetriever\n",
      "- AzureAISearchRetriever\n",
      "- BM25Retriever\n",
      "- ChaindeskRetriever\n",
      "- ChatGPTPluginRetriever\n",
      "- ContextualCompressionRetriever\n",
      "- DocArrayRetriever\n",
      "- ElasticSearchBM25Retriever\n",
      "- EnsembleRetriever\n",
      "- GoogleVertexAISearchRetriever\n",
      "- AmazonKendraRetriever\n",
      "- KNNRetriever\n",
      "- LlamaIndexGraphRetriever and LlamaIndexRetriever\n",
      "- MergerRetriever\n",
      "- MetalRetriever\n",
      "- MilvusRetriever\n",
      "- MultiQueryRetriever\n",
      "- ParentDocumentRetriever\n",
      "- PineconeHybridSearchRetriever\n",
      "- PubMedRetriever\n",
      "- RePhraseQueryRetriever\n",
      "- RemoteLangChainRetriever\n",
      "- SelfQueryRetriever\n",
      "- SVMRetriever\n",
      "- TFIDFRetriever\n",
      "- TimeWeightedVectorStoreRetriever\n",
      "- VespaRetriever\n",
      "- WeaviateHybridSearchRetriever\n",
      "- WebResearchRetriever\n",
      "- WikipediaRetriever\n",
      "- ZepRetriever\n",
      "- ZillizRetriever\n"
     ]
    }
   ],
   "source": [
    "print(qa_dict[\"What kind of retrievers does LangChain have?\"])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}