Update app.py
Browse files
app.py
CHANGED
@@ -120,15 +120,44 @@ def main():
|
|
120 |
for file in docs:
|
121 |
print('file - type : ', file.type)
|
122 |
if file.type == 'text/plain':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
# file is .txt
|
124 |
doc_list.extend(get_text_file(file))
|
125 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# file is .pdf
|
127 |
doc_list.extend(get_pdf_text(file))
|
128 |
elif file.type == 'text/csv':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# file is .csv
|
130 |
doc_list.extend(get_csv_file(file))
|
131 |
elif file.type == 'application/json':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
# file is .json
|
133 |
doc_list.extend(get_json_file(file))
|
134 |
|
|
|
120 |
for file in docs:
|
121 |
print('file - type : ', file.type)
|
122 |
if file.type == 'text/plain':
|
123 |
+
def get_text_file(text_file):
|
124 |
+
with NamedTemporaryFile() as temp_file:
|
125 |
+
temp_file.write(text_chunks.getvalue())
|
126 |
+
temp_file.seek(0)
|
127 |
+
text_loader = TextLoader(temp_file.name)
|
128 |
+
text_file = text_loader.load()
|
129 |
+
return text_file
|
130 |
# file is .txt
|
131 |
doc_list.extend(get_text_file(file))
|
132 |
elif file.type in ['application/octet-stream', 'application/pdf']:
|
133 |
+
def get_pdf_text(pdf_docs):
|
134 |
+
with NamedTemporaryFile() as temp_file:
|
135 |
+
temp_file.write(pdf_docs.getvalue())
|
136 |
+
temp_file.seek(0)
|
137 |
+
pdf_loader = PyPDFLoader(temp_file.name)
|
138 |
+
pdf_doc = pdf_loader.load()
|
139 |
+
return pdf_doc
|
140 |
# file is .pdf
|
141 |
doc_list.extend(get_pdf_text(file))
|
142 |
elif file.type == 'text/csv':
|
143 |
+
def get_csv_file(csv_file):
|
144 |
+
with NamedTemporaryFile() as temp_file:
|
145 |
+
temp_file.write(csv_file.getvalue())
|
146 |
+
temp_file.seek(0)
|
147 |
+
csv_loader = CSVLoader(temp_file.name)
|
148 |
+
csv_file = csv_loader.load()
|
149 |
+
return csv_file
|
150 |
+
|
151 |
# file is .csv
|
152 |
doc_list.extend(get_csv_file(file))
|
153 |
elif file.type == 'application/json':
|
154 |
+
def get_json_file(json_file):
|
155 |
+
with NamedTemporaryFile() as temp_file:
|
156 |
+
temp_file.write(json_file.getvalue())
|
157 |
+
temp_file.seek(0)
|
158 |
+
json_loader = JSONLoader(temp_file.name)
|
159 |
+
json_file = json_loader.load()
|
160 |
+
return json_file
|
161 |
# file is .json
|
162 |
doc_list.extend(get_json_file(file))
|
163 |
|