chatbytes commited on
Commit
e8b031a
·
verified ·
1 Parent(s): d78dbfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  # from langchain_community.llms import GooglePalm
3
  # from langchain_community.embeddings import HuggingFaceInstructEmbeddings
4
- # from langchain.text_splitter import CharacterTextSplitter
5
  # from langchain_community.embeddings import GooglePalmEmbeddings
6
  # from langchain_community.vectorstores import FAISS
7
  # from langchain_community.document_loaders import PyPDFLoader
@@ -14,15 +14,15 @@ import PyPDF2
14
  # history.append((user_input, bot_response))
15
  # return history, history
16
 
17
- # def text_splitter_function(text):
18
- # text_splitter = CharacterTextSplitter(
19
- # separator = '\n',
20
- # chunk_size = 1000,
21
- # chunk_overlap = 40,
22
- # length_function = len,
23
- # )
24
- # texts = text_splitter.split_text(text)
25
- # return texts;
26
 
27
  def text_extract(file):
28
  pdf_reader = PyPDF2.PdfReader(file.name)
@@ -33,7 +33,7 @@ def text_extract(file):
33
  for page_num in range(num_pages):
34
  page = pdf_reader.pages[page_num]
35
  text += page.extract_text()
36
- # text_splitter=text_splitter_function(text);
37
  # db = FAISS.from_texts(text_splitter, embeddings);
38
  # retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
39
  # llm=GooglePalm(google_api_key=google_api)
@@ -41,7 +41,7 @@ def text_extract(file):
41
  # llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
42
  # )
43
  # print(db)
44
- return text
45
 
46
 
47
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
  # from langchain_community.llms import GooglePalm
3
  # from langchain_community.embeddings import HuggingFaceInstructEmbeddings
4
+ from langchain.text_splitter import CharacterTextSplitter
5
  # from langchain_community.embeddings import GooglePalmEmbeddings
6
  # from langchain_community.vectorstores import FAISS
7
  # from langchain_community.document_loaders import PyPDFLoader
 
14
  # history.append((user_input, bot_response))
15
  # return history, history
16
 
17
+ def text_splitter_function(text):
18
+ text_splitter = CharacterTextSplitter(
19
+ separator = '\n',
20
+ chunk_size = 1000,
21
+ chunk_overlap = 40,
22
+ length_function = len,
23
+ )
24
+ texts = text_splitter.split_text(text)
25
+ return texts;
26
 
27
  def text_extract(file):
28
  pdf_reader = PyPDF2.PdfReader(file.name)
 
33
  for page_num in range(num_pages):
34
  page = pdf_reader.pages[page_num]
35
  text += page.extract_text()
36
+ text_splitter=text_splitter_function(text);
37
  # db = FAISS.from_texts(text_splitter, embeddings);
38
  # retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
39
  # llm=GooglePalm(google_api_key=google_api)
 
41
  # llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
42
  # )
43
  # print(db)
44
+ return text_splitter
45
 
46
 
47
  with gr.Blocks() as demo: