awacke1 commited on
Commit
03d5e6b
1 Parent(s): bf4227b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -203,7 +203,9 @@ def extract_mime_type(file):
203
  else:
204
  raise TypeError("Input should be a string or a streamlit.UploadedFile object")
205
 
206
- from io import BytesIO
 
 
207
 
208
  def extract_file_extension(file):
209
  # get the file name directly from the UploadedFile object
@@ -229,8 +231,12 @@ def pdf2txt(docs):
229
 
230
  # read the file according to its extension
231
  try:
232
- if file_extension.lower() in ['txt', 'html', 'htm', 'py', 'xml', 'json', 'docx']:
233
- text += textract.process(temp_file_name).decode("utf-8")
 
 
 
 
234
  elif file_extension.lower() == 'pdf':
235
  with open(temp_file_name, "rb") as f:
236
  pdf = PdfFileReader(f)
@@ -245,6 +251,7 @@ def pdf2txt(docs):
245
  return text
246
 
247
 
 
248
  def pdf2txt_old(pdf_docs):
249
  st.write(pdf_docs)
250
  for file in pdf_docs:
 
203
  else:
204
  raise TypeError("Input should be a string or a streamlit.UploadedFile object")
205
 
206
+ from PyPDF2 import PdfFileReader
207
+ import os
208
+ import re
209
 
210
  def extract_file_extension(file):
211
  # get the file name directly from the UploadedFile object
 
231
 
232
  # read the file according to its extension
233
  try:
234
+ if file_extension.lower() == 'py':
235
+ with open(temp_file_name, 'r') as f:
236
+ text += f.read()
237
+ elif file_extension.lower() in ['txt', 'html', 'htm', 'xml', 'json']:
238
+ with open(temp_file_name, 'r') as f:
239
+ text += f.read()
240
  elif file_extension.lower() == 'pdf':
241
  with open(temp_file_name, "rb") as f:
242
  pdf = PdfFileReader(f)
 
251
  return text
252
 
253
 
254
+
255
  def pdf2txt_old(pdf_docs):
256
  st.write(pdf_docs)
257
  for file in pdf_docs: