seanpedrickcase commited on
Commit
49679bb
1 Parent(s): 6417426

Couple of changes to requirements

Browse files
Files changed (2) hide show
  1. app.py +53 -35
  2. requirements.txt +2 -0
app.py CHANGED
@@ -150,12 +150,12 @@ model_type = "Flan T5 Large Stacked Samsum 1k"
150
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
151
 
152
  model_type = "Long T5 Global Base 16k Book Summary"
153
- load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
154
 
155
  today = datetime.now().strftime("%d%m%Y")
156
  today_rev = datetime.now().strftime("%Y%m%d")
157
 
158
- def summarise_text(text, text_df, length_slider, in_colname, model_type):
159
 
160
  if text_df.empty:
161
  in_colname="text"
@@ -164,15 +164,30 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type):
164
  in_text_df = pd.DataFrame({in_colname_list_first:[text]})
165
 
166
  else:
167
- in_text_df = text_df #pd.read_csv(text_df.name, delimiter = ",", low_memory=False, encoding='cp1252')
168
- in_colname_list_first = in_colname.tolist()[0][0]
169
 
170
  print(model_type)
171
 
 
 
172
  if model_type != "Mistral Nous Capybara 4k (larger, slow)":
173
- summarised_text = chatf.model(list(in_text_df[in_colname_list_first]), max_length=length_slider)
 
 
 
 
 
 
 
 
 
174
 
175
- print(summarised_text)
 
 
 
 
176
 
177
  if model_type == "Mistral Nous Capybara 4k (larger, slow)":
178
 
@@ -180,47 +195,52 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type):
180
 
181
  from chatfuncs.prompts import nous_capybara_prompt
182
 
183
- formatted_string = nous_capybara_prompt.format(length=length, text=text)
184
- #formatted_string = open_hermes_prompt.format(length=length, text=text)
185
 
186
- # print(formatted_string)
187
 
188
- #for output in chatf.model(formatted_string, max_length = 1000):#, stream=True):
189
- for output in chatf.model(formatted_string, max_length = 10000):#, stream=True):
190
- print(output, end="", flush=True)
191
 
192
- output_str = output['generated_text']
 
193
 
194
- # Find the index of 'ASSISTANT: ' to select only text after this location
195
- index = output_str.find('ASSISTANT: ')
196
 
197
- # Check if 'ASSISTANT: ' is found in the string
198
- if index != -1:
199
- # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
200
- start_index = index + len('ASSISTANT: ')
201
-
202
- # Slice the string from this point to the end
203
- assistant_text = output_str[start_index:]
204
- else:
205
- assistant_text = "ASSISTANT: not found in text"
206
 
207
- print(assistant_text)
208
 
209
- summarised_text = assistant_text#chatf.model(formatted_string, max_length = 1000)#, max_new_tokens=length_slider)
 
210
 
211
- #summarised_text = "Mistral Nous Capybara 4k summaries currently not working. Sorry!"
 
 
 
 
 
 
 
 
212
 
213
- #rint(summarised_text)
 
 
 
 
 
 
214
 
215
  if text_df.empty:
216
  if model_type != "Mistral Nous Capybara 4k (larger, slow)":
217
- summarised_text_out = summarised_text[0].values()
218
 
219
  if model_type == "Mistral Nous Capybara 4k (larger, slow)":
220
- summarised_text_out = summarised_text
221
 
222
  else:
223
- summarised_text_out = [d['summary_text'] for d in summarised_text] #summarised_text[0].values()
224
 
225
  output_name = "summarise_output_" + today_rev + ".csv"
226
  output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
@@ -253,10 +273,8 @@ with block:
253
  in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
254
 
255
  with gr.Accordion("Summarise open text from a file", open = False):
256
- in_text_df = gr.File(label="Input text from file")
257
- in_colname = gr.Dataframe(label="Write the column name for the open text to summarise",
258
- type="numpy", row_count=(1,"fixed"), col_count = (1,"fixed"),
259
- headers=["Open text column name"])#, "Address column name 2", "Address column name 3", "Address column name 4"])
260
 
261
  with gr.Row():
262
  summarise_btn = gr.Button("Summarise")
 
150
  load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
151
 
152
  model_type = "Long T5 Global Base 16k Book Summary"
153
+ load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
154
 
155
  today = datetime.now().strftime("%d%m%Y")
156
  today_rev = datetime.now().strftime("%Y%m%d")
157
 
158
+ def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
159
 
160
  if text_df.empty:
161
  in_colname="text"
 
164
  in_text_df = pd.DataFrame({in_colname_list_first:[text]})
165
 
166
  else:
167
+ in_text_df = text_df
168
+ in_colname_list_first = in_colname
169
 
170
  print(model_type)
171
 
172
+ texts_list = list(in_text_df[in_colname_list_first])
173
+
174
  if model_type != "Mistral Nous Capybara 4k (larger, slow)":
175
+ summarised_texts = []
176
+
177
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
178
+ summarised_text = chatf.model(single_text, max_length=length_slider)
179
+
180
+ #print(summarised_text)
181
+
182
+ summarised_text_str = summarised_text[0]['summary_text']
183
+
184
+ summarised_texts.append(summarised_text_str)
185
 
186
+ print(summarised_text_str)
187
+
188
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
189
+
190
+ #print(summarised_texts)
191
 
192
  if model_type == "Mistral Nous Capybara 4k (larger, slow)":
193
 
 
195
 
196
  from chatfuncs.prompts import nous_capybara_prompt
197
 
198
+ summarised_texts = []
 
199
 
200
+ for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
201
 
202
+ formatted_string = nous_capybara_prompt.format(length=length, text=single_text)
 
 
203
 
204
+ # print(formatted_string)
205
+ output = chatf.model(formatted_string, max_length = 10000)
206
 
207
+ #for output in chatf.model(formatted_string, max_length = 10000):#, stream=True):
208
+ # print(output, end="", flush=True)
209
 
210
+ print(output)
 
 
 
 
 
 
 
 
211
 
212
+ output_str = output[0]['generated_text']
213
 
214
+ # Find the index of 'ASSISTANT: ' to select only text after this location
215
+ index = output_str.find('ASSISTANT: ')
216
 
217
+ # Check if 'ASSISTANT: ' is found in the string
218
+ if index != -1:
219
+ # Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
220
+ start_index = index + len('ASSISTANT: ')
221
+
222
+ # Slice the string from this point to the end
223
+ assistant_text = output_str[start_index:]
224
+ else:
225
+ assistant_text = "ASSISTANT: not found in text"
226
 
227
+ print(assistant_text)
228
+
229
+ summarised_texts.append(assistant_text)
230
+
231
+ #print(summarised_text)
232
+
233
+ #pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
234
 
235
  if text_df.empty:
236
  if model_type != "Mistral Nous Capybara 4k (larger, slow)":
237
+ summarised_text_out = summarised_texts[0]#.values()
238
 
239
  if model_type == "Mistral Nous Capybara 4k (larger, slow)":
240
+ summarised_text_out = summarised_texts[0]
241
 
242
  else:
243
+ summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
244
 
245
  output_name = "summarise_output_" + today_rev + ".csv"
246
  output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
 
273
  in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
274
 
275
  with gr.Accordion("Summarise open text from a file", open = False):
276
+ in_text_df = gr.File(label="Input text from file", file_count='multiple')
277
+ in_colname = gr.Dropdown(label="Write the column name for the open text to summarise")
 
 
278
 
279
  with gr.Row():
280
  summarise_btn = gr.Button("Summarise")
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  gradio==3.50.0
2
  transformers
3
  torch
 
 
4
  ctransformers[cuda]
 
1
  gradio==3.50.0
2
  transformers
3
  torch
4
+ pyarrow
5
+ openpyxl
6
  ctransformers[cuda]