Samarth991 commited on
Commit
90fc7ac
1 Parent(s): 3edae51

adding online PDF loader

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -28,7 +28,7 @@ def get_openai_chat_model(API_key):
28
 
29
  def process_documents(documents,data_chunk=1000,chunk_overlap=50):
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
31
- texts = text_splitter.split_documents(documents[0])
32
  return texts
33
 
34
  def get_hugging_face_model(model_id,API_key,temperature=0.1):
@@ -56,6 +56,7 @@ def document_loader(file_path,api_key,doc_type='pdf',llm='Huggingface'):
56
  elif doc_type == 'word':
57
  document = process_word_document(document_file=file_path)
58
  if document:
 
59
  texts = process_documents(documents=document)
60
  vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
61
  global qa
@@ -91,7 +92,7 @@ def process_word_document(document_file):
91
 
92
  def process_pdf_document(document_file):
93
  print("Document File Name :",document_file.name)
94
- loader = OnlinePDFLoader(document_file.name)
95
  document = loader.load()[0]
96
  return document
97
 
 
28
 
29
  def process_documents(documents,data_chunk=1000,chunk_overlap=50):
30
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap)
31
+ texts = text_splitter.split_documents(documents)
32
  return texts
33
 
34
  def get_hugging_face_model(model_id,API_key,temperature=0.1):
 
56
  elif doc_type == 'word':
57
  document = process_word_document(document_file=file_path)
58
  if document:
59
+ print("Document :",document)
60
  texts = process_documents(documents=document)
61
  vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
62
  global qa
 
92
 
93
  def process_pdf_document(document_file):
94
  print("Document File Name :",document_file.name)
95
+ loader = PDFMinerLoader(document_file.name)
96
  document = loader.load()[0]
97
  return document
98