abishek-official commited on
Commit
e7b5526
1 Parent(s): f661324

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
- from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.memory import ConversationBufferMemory
@@ -18,15 +18,10 @@ def get_pdf_text(pdf_docs):
18
  return text
19
 
20
 
21
- def get_text_chunks(text):
22
- text_splitter = CharacterTextSplitter(
23
- separator="\n",
24
- chunk_size=1000,
25
- chunk_overlap=200,
26
- length_function=len
27
- )
28
- chunks = text_splitter.split_documents(text)
29
- return chunks
30
 
31
 
32
  def get_vectorstore(text_chunks):
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.embeddings import HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.memory import ConversationBufferMemory
 
18
  return text
19
 
20
 
21
+ def get_text_chunks(documents):
22
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
23
+ text_chunks = text_splitter.split_documents(documents)
24
+ return text_chunks
 
 
 
 
 
25
 
26
 
27
  def get_vectorstore(text_chunks):