Samarth991 commited on
Commit
1c52547
1 Parent(s): 5a2a128

adding online PDF loader

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -48,13 +48,13 @@ def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
48
  embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
49
  document = None
50
  if doc_type == 'pdf':
51
- document = process_pdf_document(document_file_name=file_data)
52
  elif doc_type == 'text':
53
- document = process_text_document(document_file_name=file_data)
54
  elif doc_type == 'csv':
55
- document = process_csv_document(document_file_name=file_data)
56
  elif doc_type == 'word':
57
- document = process_word_document(document_file_name=file_data)
58
  if document:
59
  texts = process_documents(documents=document)
60
  vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
@@ -71,20 +71,20 @@ def document_loader(file_data,api_key,doc_type='pdf',llm='Huggingface'):
71
  return "Ready..."
72
 
73
 
74
- def process_text_document(document_file_name):
75
- loader = TextLoader(document_file_name)
76
  document = loader.load()
77
  return document
78
 
79
 
80
- def process_csv_document(document_file_name):
81
- loader = CSVLoader(file_path=document_file_name)
82
  document = loader.load()
83
  return document
84
 
85
 
86
- def process_word_document(document_file_name):
87
- loader = UnstructuredWordDocumentLoader(file_path=document_file_name)
88
  document = loader.load()
89
  return document
90
 
 
48
  embedding_model = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2',model_kwargs={"device": DEVICE})
49
  document = None
50
  if doc_type == 'pdf':
51
+ document = process_pdf_document(document_file=file_data)
52
  elif doc_type == 'text':
53
+ document = process_text_document(document_file=file_data)
54
  elif doc_type == 'csv':
55
+ document = process_csv_document(document_file=file_data)
56
  elif doc_type == 'word':
57
+ document = process_word_document(document_file=file_data)
58
  if document:
59
  texts = process_documents(documents=document)
60
  vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
 
71
  return "Ready..."
72
 
73
 
74
+ def process_text_document(document_file):
75
+ loader = TextLoader(document_file.name)
76
  document = loader.load()
77
  return document
78
 
79
 
80
+ def process_csv_document(document_file):
81
+ loader = CSVLoader(file_path=document_file.name)
82
  document = loader.load()
83
  return document
84
 
85
 
86
+ def process_word_document(document_file):
87
+ loader = UnstructuredWordDocumentLoader(file_path=document_file.name)
88
  document = loader.load()
89
  return document
90