Heiko Hotz commited on
Commit
9b6d1c0
1 Parent(s): 55ad2ee

initial commit

Browse files
Files changed (1) hide show
  1. app.py +33 -33
app.py CHANGED
@@ -79,39 +79,39 @@ with open('data/'+contracts_files[idx]) as f:
79
  # print(user_upload)
80
 
81
  # process upload
82
- if user_upload is not None:
83
- print(user_upload.name, user_upload.type)
84
- extension = user_upload.name.split('.')[-1].lower()
85
- if extension == 'txt':
86
- print('text file uploaded')
87
- # To convert to a string based IO:
88
- stringio = StringIO(user_upload.getvalue().decode("utf-8"))
89
-
90
- # To read file as string:
91
- contract_data = stringio.read()
92
-
93
- elif extension == 'pdf':
94
- import PyPDF4
95
- try:
96
- # Extracting Text from PDFs
97
- pdfReader = PyPDF4.PdfFileReader(user_upload)
98
- print(pdfReader.numPages)
99
- contract_data = ''
100
- for i in range(0, pdfReader.numPages):
101
-
102
- print(i)
103
- pageobj = pdfReader.getPage(i)
104
- contract_data = contract_data + pageobj.extractText()
105
- except:
106
- st.warning('Unable to read PDF, please try another file')
107
-
108
- elif extension == 'docx':
109
- import docx2txt
110
-
111
- contract_data = docx2txt.process(user_upload)
112
-
113
- else:
114
- st.warning('Unknown uploaded file type, please try again')
115
 
116
  results_drop = ['1', '2', '3']
117
  number_results = st.sidebar.selectbox('Select number of results', results_drop)
 
79
  # print(user_upload)
80
 
81
  # process upload
82
+ # if user_upload is not None:
83
+ # print(user_upload.name, user_upload.type)
84
+ # extension = user_upload.name.split('.')[-1].lower()
85
+ # if extension == 'txt':
86
+ # print('text file uploaded')
87
+ # # To convert to a string based IO:
88
+ # stringio = StringIO(user_upload.getvalue().decode("utf-8"))
89
+ #
90
+ # # To read file as string:
91
+ # contract_data = stringio.read()
92
+ #
93
+ # elif extension == 'pdf':
94
+ # import PyPDF4
95
+ # try:
96
+ # # Extracting Text from PDFs
97
+ # pdfReader = PyPDF4.PdfFileReader(user_upload)
98
+ # print(pdfReader.numPages)
99
+ # contract_data = ''
100
+ # for i in range(0, pdfReader.numPages):
101
+ #
102
+ # print(i)
103
+ # pageobj = pdfReader.getPage(i)
104
+ # contract_data = contract_data + pageobj.extractText()
105
+ # except:
106
+ # st.warning('Unable to read PDF, please try another file')
107
+ #
108
+ # elif extension == 'docx':
109
+ # import docx2txt
110
+ #
111
+ # contract_data = docx2txt.process(user_upload)
112
+ #
113
+ # else:
114
+ # st.warning('Unknown uploaded file type, please try again')
115
 
116
  results_drop = ['1', '2', '3']
117
  number_results = st.sidebar.selectbox('Select number of results', results_drop)