Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -203,7 +203,9 @@ def extract_mime_type(file):
|
|
203 |
else:
|
204 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
205 |
|
206 |
-
from
|
|
|
|
|
207 |
|
208 |
def extract_file_extension(file):
|
209 |
# get the file name directly from the UploadedFile object
|
@@ -229,8 +231,12 @@ def pdf2txt(docs):
|
|
229 |
|
230 |
# read the file according to its extension
|
231 |
try:
|
232 |
-
if file_extension.lower()
|
233 |
-
|
|
|
|
|
|
|
|
|
234 |
elif file_extension.lower() == 'pdf':
|
235 |
with open(temp_file_name, "rb") as f:
|
236 |
pdf = PdfFileReader(f)
|
@@ -245,6 +251,7 @@ def pdf2txt(docs):
|
|
245 |
return text
|
246 |
|
247 |
|
|
|
248 |
def pdf2txt_old(pdf_docs):
|
249 |
st.write(pdf_docs)
|
250 |
for file in pdf_docs:
|
|
|
203 |
else:
|
204 |
raise TypeError("Input should be a string or a streamlit.UploadedFile object")
|
205 |
|
206 |
+
from PyPDF2 import PdfFileReader
|
207 |
+
import os
|
208 |
+
import re
|
209 |
|
210 |
def extract_file_extension(file):
|
211 |
# get the file name directly from the UploadedFile object
|
|
|
231 |
|
232 |
# read the file according to its extension
|
233 |
try:
|
234 |
+
if file_extension.lower() == 'py':
|
235 |
+
with open(temp_file_name, 'r') as f:
|
236 |
+
text += f.read()
|
237 |
+
elif file_extension.lower() in ['txt', 'html', 'htm', 'xml', 'json']:
|
238 |
+
with open(temp_file_name, 'r') as f:
|
239 |
+
text += f.read()
|
240 |
elif file_extension.lower() == 'pdf':
|
241 |
with open(temp_file_name, "rb") as f:
|
242 |
pdf = PdfFileReader(f)
|
|
|
251 |
return text
|
252 |
|
253 |
|
254 |
+
|
255 |
def pdf2txt_old(pdf_docs):
|
256 |
st.write(pdf_docs)
|
257 |
for file in pdf_docs:
|