pszemraj commited on
Commit
0407083
1 Parent(s): 9e8f29e

🚧 streamline vars

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -241,21 +241,24 @@ def proc_submission(
241
 
242
  st = time.perf_counter()
243
  history = {}
244
- clean_text = clean(input_text, lower=False)
245
- clean_text = remove_stopwords(clean_text) if predrop_stopwords else clean_text
246
  logging.info(
247
- f"pre-truncation word count: {len(contraction_aware_tokenize(clean_text))}"
 
 
 
248
  )
249
- truncation_validated = truncate_word_count(clean_text, max_words=max_input_length)
250
 
251
  if truncation_validated["was_truncated"]:
252
  model_input_text = truncation_validated["processed_text"]
253
  # create elaborate HTML warning
254
- input_wc = re.split(r"\s+", input_text)
255
  msg = f"""
256
  <div style="background-color: #FFA500; color: white; padding: 20px;">
257
  <h3>Warning</h3>
258
  <p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
 
259
  </div>
260
  """
261
  logging.warning(msg)
 
241
 
242
  st = time.perf_counter()
243
  history = {}
244
+ cln_text = clean(input_text, lower=False)
245
+ parsed_cln_text = remove_stopwords(cln_text) if predrop_stopwords else cln_text
246
  logging.info(
247
+ f"pre-truncation word count: {len(contraction_aware_tokenize(parsed_cln_text))}"
248
+ )
249
+ truncation_validated = truncate_word_count(
250
+ parsed_cln_text, max_words=max_input_length
251
  )
 
252
 
253
  if truncation_validated["was_truncated"]:
254
  model_input_text = truncation_validated["processed_text"]
255
  # create elaborate HTML warning
256
+ input_wc = len(contraction_aware_tokenize(parsed_cln_text))
257
  msg = f"""
258
  <div style="background-color: #FFA500; color: white; padding: 20px;">
259
  <h3>Warning</h3>
260
  <p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
261
+ <p>Dropping stopwords is set to {predrop_stopwords}. If this is not what you intended, please validate the advanced settings.</p>
262
  </div>
263
  """
264
  logging.warning(msg)