Spaces:

awacke1
/

VoiceGPT15

Sleeping

awacke1 commited on Jul 7, 2023

Commit

d11a287

•

1 Parent(s): 03d5e6b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -203,8 +203,7 @@ def extract_mime_type(file):
     else:
         raise TypeError("Input should be a string or a streamlit.UploadedFile object")
-from PyPDF2 import PdfFileReader
-import os
 import re
 def extract_file_extension(file):
@@ -224,34 +223,19 @@ def pdf2txt(docs):
         # print the file extension
         st.write(f"File type extension: {file_extension}")
-        # save the uploaded file temporarily
-        temp_file_name = file.name
-        with open(temp_file_name, "wb") as f:
-            f.write(file.getvalue())
         # read the file according to its extension
         try:
-            if file_extension.lower() == 'py':
-                with open(temp_file_name, 'r') as f:
-                    text += f.read()
-            elif file_extension.lower() in ['txt', 'html', 'htm', 'xml', 'json']:
-                with open(temp_file_name, 'r') as f:
-                    text += f.read()
             elif file_extension.lower() == 'pdf':
-                with open(temp_file_name, "rb") as f:
-                    pdf = PdfFileReader(f)
-                    for page in range(pdf.getNumPages()):
-                        text += pdf.getPage(page).extractText()
         except Exception as e:
             st.write(f"Error processing file {file.name}: {e}")
-        # remove the temporary file
-        os.remove(temp_file_name)
     return text
 def pdf2txt_old(pdf_docs):
     st.write(pdf_docs)
     for file in pdf_docs:

     else:
         raise TypeError("Input should be a string or a streamlit.UploadedFile object")
+from io import BytesIO
 import re
 def extract_file_extension(file):
         # print the file extension
         st.write(f"File type extension: {file_extension}")
         # read the file according to its extension
         try:
+            if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
+                text += file.getvalue().decode('utf-8')
             elif file_extension.lower() == 'pdf':
+                pdf = PdfFileReader(BytesIO(file.getvalue()))
+                for page in range(pdf.getNumPages()):
+                    text += pdf.getPage(page).extractText()
         except Exception as e:
             st.write(f"Error processing file {file.name}: {e}")
     return text
 def pdf2txt_old(pdf_docs):
     st.write(pdf_docs)
     for file in pdf_docs: