rajivmehtadata commited on
Commit
a1521a0
1 Parent(s): e842b33

update in code.

Browse files
app.py CHANGED
@@ -5,21 +5,32 @@ try:
5
  import langchain
6
  from langchain import Cohere
7
  import os
8
- from langchain.document_loaders import PagedPDFSplitter
9
  from langchain.embeddings.cohere import CohereEmbeddings
10
  from langchain.vectorstores import Chroma
11
  from langchain.text_splitter import CharacterTextSplitter
12
  from langchain.llms import Cohere
 
 
13
  except Exception as ex:
14
  print(f'Some Error --> {ex}')
15
 
16
- doclist=[doc for doc in [docs for docs in list(map(lambda pdffile : PagedPDFSplitter(pdffile).load_and_split(),list(map(lambda fileref:os.path.abspath(f'data/{fileref}'),os.listdir('./data')))))]]
17
- flat_list = [item for sublist in doclist for item in sublist]
18
- text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
19
- docs=text_splitter.split_documents(flat_list)
20
 
21
  embeddings = CohereEmbeddings()
22
- docsearch = Chroma.from_documents(docs, embeddings, metadatas=[{"source": str(i)} for i in range(len(docs))])
 
 
 
 
 
 
 
 
 
 
 
 
23
  print(f'Done with loading and Indexing')
24
 
25
  def procdata(name):
 
5
  import langchain
6
  from langchain import Cohere
7
  import os
 
8
  from langchain.embeddings.cohere import CohereEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.text_splitter import CharacterTextSplitter
11
  from langchain.llms import Cohere
12
+ from langchain.chains import ChatVectorDBChain
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
  except Exception as ex:
15
  print(f'Some Error --> {ex}')
16
 
17
+ with open('kb.txt') as f:
18
+ state_of_the_union = f.read()
 
 
19
 
20
  embeddings = CohereEmbeddings()
21
+
22
+ text_splitter = RecursiveCharacterTextSplitter(
23
+ chunk_size = 100,
24
+ chunk_overlap = 20,
25
+ length_function = len,
26
+ )
27
+
28
+ texts = text_splitter.create_documents([state_of_the_union])
29
+
30
+ docsearch = Chroma.from_documents(texts, embeddings)
31
+
32
+ llm=Cohere(model="command-xlarge-nightly",temperature=0.7,max_tokens=1000)
33
+
34
  print(f'Done with loading and Indexing')
35
 
36
  def procdata(name):
data/Chapter-5.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:07bb2f993d156d4e111ded9faab34b814844a52a16989bbd291848046607be18
3
- size 771590
 
 
 
 
data/Chapter-6.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebe4deac8a76b3657e3ceb6a8a5e996a9482084d8c4c6d318d746355aa79522e
3
- size 982810
 
 
 
 
data/Chapter-7.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1075e582ba6d05791d7a13ad9954e730d451c5ac0b49c4f2ca32b9cd98d1f0dc
3
- size 782149
 
 
 
 
data/Chapter-8.pdf DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43237c26adf1a84b1f0a3ee5d03930d5a1703ca8a458ca5bfb2a5b8dad43a4d2
3
- size 1724135
 
 
 
 
data/sample.txt DELETED
@@ -1 +0,0 @@
1
- dummy data
 
 
kb.txt ADDED
The diff for this file is too large to render. See raw diff