awacke1 commited on
Commit
22c8575
1 Parent(s): 89a7198

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -205,20 +205,21 @@ def extract_mime_type(file):
205
 
206
  import textract
207
  import os
208
- def extract_file_extension(file_str):
209
- # Using regex pattern matching to find the file extension
210
- pattern = r"name='.*?\.(.*?)'"
211
- match = re.search(pattern, file_str)
 
212
  if match:
213
  return match.group(1)
214
  else:
215
- raise ValueError(f"Unable to extract file extension from {file_str}")
216
 
217
  def pdf2txt(pdf_docs):
218
  text = ""
219
- for file_str in pdf_docs:
220
- file_extension = extract_file_extension(file_str)
221
- # Print the file extension
222
  print(f"File type extension: {file_extension}")
223
 
224
  # Simulate file reading
@@ -234,6 +235,7 @@ def pdf2txt(pdf_docs):
234
  text += f"\nExtracted text from PDF file..."
235
 
236
  return text
 
237
 
238
  def pdf2txt_old(pdf_docs):
239
  st.write(pdf_docs)
 
205
 
206
  import textract
207
  import os
208
+ def extract_file_extension(file):
209
+ # Assume file is an UploadedFile object and get the name directly
210
+ file_name = file.name
211
+ pattern = r".*?\.(.*?)$"
212
+ match = re.search(pattern, file_name)
213
  if match:
214
  return match.group(1)
215
  else:
216
+ raise ValueError(f"Unable to extract file extension from {file_name}")
217
 
218
  def pdf2txt(pdf_docs):
219
  text = ""
220
+ for file in pdf_docs:
221
+ file_extension = extract_file_extension(file)
222
+ # print the file extension
223
  print(f"File type extension: {file_extension}")
224
 
225
  # Simulate file reading
 
235
  text += f"\nExtracted text from PDF file..."
236
 
237
  return text
238
+
239
 
240
  def pdf2txt_old(pdf_docs):
241
  st.write(pdf_docs)