Paula Leonova commited on
Commit
6af2acc
1 Parent(s): c5fa7c2

Add txt file conversion to dataframe

Browse files
Files changed (1) hide show
  1. app.py +17 -2
app.py CHANGED
@@ -42,7 +42,7 @@ with st.form(key='my_form'):
42
 
43
  text_csv_expander = st.expander(label=f'Want to upload multiple texts at once? Expand to upload your text files below.', expanded=False)
44
  with text_csv_expander:
45
- uploaded_text_file = st.file_uploader(label="Upload file(s) that end with the .txt suffix",
46
  accept_multiple_files=True,
47
  type = 'txt')
48
 
@@ -107,9 +107,24 @@ with st.spinner('Loading pretrained models...'):
107
 
108
 
109
  if submit_button or example_button:
110
- if len(text_input) == 0:
111
  st.error("Enter some text to generate a summary")
112
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
114
  # For each body of text, create text chunks of a certain token size required for the transformer
115
  nested_sentences = md.create_nest_sentences(document = text_input, token_max_length = 1024)
 
42
 
43
  text_csv_expander = st.expander(label=f'Want to upload multiple texts at once? Expand to upload your text files below.', expanded=False)
44
  with text_csv_expander:
45
+ uploaded_text_files = st.file_uploader(label="Upload file(s) that end with the .txt suffix",
46
  accept_multiple_files=True,
47
  type = 'txt')
48
 
 
107
 
108
 
109
  if submit_button or example_button:
110
+ if len(text_input) == 0 and uploaded_text_files is None:
111
  st.error("Enter some text to generate a summary")
112
  else:
113
+
114
+ if uploaded_text_files is not None:
115
+ file_names = []
116
+ raw_texts = []
117
+ for uploaded_file in uploaded_text_files:
118
+ text = str(uploaded_file.read(), "utf-8")
119
+ raw_texts.append(text)
120
+ file_names.append(uploaded_file.name)
121
+ # st.write("filename:", uploaded_file.name)
122
+ # st.write(raw_text)
123
+ text_data = pd.DataFrame({'title': file_names,
124
+ 'text': raw_texts})
125
+ st.dataframe(text_data.head())
126
+
127
+
128
  with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
129
  # For each body of text, create text chunks of a certain token size required for the transformer
130
  nested_sentences = md.create_nest_sentences(document = text_input, token_max_length = 1024)