Spaces:

twigs
/

simplifier

Runtime error

twigs commited on Jul 19, 2022

Commit

98ce4ad

•

1 Parent(s): b4f09d3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,11 +20,10 @@ simpl_model = BartForConditionalGeneration.from_pretrained(
     'twigs/bart-text2text-simplifier')
 cwi_pipe = pipeline('text-classification', model=cwi_model,
                     tokenizer=cwi_tok, function_to_apply='none')
-fill_pipe = pipeline('fill-mask', model=simpl_model,
-                     tokenizer=simpl_tok, top_k=1)
-def id_replace_complex(s, threshold=0.4):
   # get all tokens
   tokens = re.compile('\w+').findall(s)
@@ -34,19 +33,16 @@ def id_replace_complex(s, threshold=0.4):
   compl_tok = [tokens[idx] for idx, x in enumerate(
       cwi_pipe(cands)) if x['score'] >= threshold]
-  replacements = []
-  # potentially parallelizable, depends on desired behaviour
-  for t in compl_tok:
-    idx = s.index(t)
-    s = s[:idx] + '<mask>' + s[idx+len(t):]
-    # get top candidate for mask fill in complex token
-    top_result = fill_pipe(s)[0]
-    s = top_result['sequence']
-    print(s)
-    replacements.append(top_result['token_str'])
   return s, compl_tok, replacements
 def generate_candidate_text(s, model, tokenizer, tokenized=False):

     'twigs/bart-text2text-simplifier')
 cwi_pipe = pipeline('text-classification', model=cwi_model,
                     tokenizer=cwi_tok, function_to_apply='none')
+fill_pipe = pipeline('fill-mask', top_k=1)
+def id_replace_complex(s, threshold=0.2):
   # get all tokens
   tokens = re.compile('\w+').findall(s)
   compl_tok = [tokens[idx] for idx, x in enumerate(
       cwi_pipe(cands)) if x['score'] >= threshold]
+  masked = [s[:s.index(t)] + '<mask>' + s[s.index(t)+len(t):] for t in compl_tok]
+  cands = fill_pipe(masked)
+  replacements = [el['token_str'][1:] if type(el) == dict else el[0]['token_str'][1:] for el in cands]
+  for i, el in enumerate(compl_tok):
+    idx = s.index(el)
+    s = s[:idx] + replacements[i] + s[idx+len(el):]
   return s, compl_tok, replacements
 def generate_candidate_text(s, model, tokenizer, tokenized=False):