Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -205,20 +205,21 @@ def extract_mime_type(file):
|
|
205 |
|
206 |
import textract
|
207 |
import os
|
208 |
-
def extract_file_extension(
|
209 |
-
#
|
210 |
-
|
211 |
-
|
|
|
212 |
if match:
|
213 |
return match.group(1)
|
214 |
else:
|
215 |
-
raise ValueError(f"Unable to extract file extension from {
|
216 |
|
217 |
def pdf2txt(pdf_docs):
|
218 |
text = ""
|
219 |
-
for
|
220 |
-
file_extension = extract_file_extension(
|
221 |
-
#
|
222 |
print(f"File type extension: {file_extension}")
|
223 |
|
224 |
# Simulate file reading
|
@@ -234,6 +235,7 @@ def pdf2txt(pdf_docs):
|
|
234 |
text += f"\nExtracted text from PDF file..."
|
235 |
|
236 |
return text
|
|
|
237 |
|
238 |
def pdf2txt_old(pdf_docs):
|
239 |
st.write(pdf_docs)
|
|
|
205 |
|
206 |
import textract
|
207 |
import os
|
208 |
+
def extract_file_extension(file):
|
209 |
+
# Assume file is an UploadedFile object and get the name directly
|
210 |
+
file_name = file.name
|
211 |
+
pattern = r".*?\.(.*?)$"
|
212 |
+
match = re.search(pattern, file_name)
|
213 |
if match:
|
214 |
return match.group(1)
|
215 |
else:
|
216 |
+
raise ValueError(f"Unable to extract file extension from {file_name}")
|
217 |
|
218 |
def pdf2txt(pdf_docs):
|
219 |
text = ""
|
220 |
+
for file in pdf_docs:
|
221 |
+
file_extension = extract_file_extension(file)
|
222 |
+
# print the file extension
|
223 |
print(f"File type extension: {file_extension}")
|
224 |
|
225 |
# Simulate file reading
|
|
|
235 |
text += f"\nExtracted text from PDF file..."
|
236 |
|
237 |
return text
|
238 |
+
|
239 |
|
240 |
def pdf2txt_old(pdf_docs):
|
241 |
st.write(pdf_docs)
|