Spaces:

tomsoderlund
/

text-summarizer

Runtime error

App Files Files Community

tomsoderlund commited on Dec 17, 2022

Commit

374bb44

1 Parent(s): 7180e60

paraphrase_text

Browse files

Files changed (2) hide show

app.py +26 -5
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,19 +1,40 @@
 import gradio
-from transformers import pipeline
-def summarize_text(text, min_length, max_length):
   summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
   short_text = text[:1024]
   summary = summarizer(short_text, max_length, min_length, do_sample=False)
   print("** summary", summary)
   return summary[0]["summary_text"]
 gradio_interface = gradio.Interface(
-  fn=summarize_text,
   inputs=[
     "text",
-    gradio.Slider(5, 200, value=30),
-    gradio.Slider(5, 500, value=130)
   ],
   outputs="text",
   examples=[

 import gradio
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+def shorten_text(text, min_length, max_length):
   summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
   short_text = text[:1024]
   summary = summarizer(short_text, max_length, min_length, do_sample=False)
   print("** summary", summary)
   return summary[0]["summary_text"]
+def paraphrase_text(text, min_length, max_length):
+  tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
+  model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
+  device = "cuda" if torch.cuda.is_available() else "cpu"
+  text_instruction =  "paraphrase: " + text + " </s>"
+  encoding = tokenizer.encode_plus(text_instruction, padding="longest", return_tensors="pt")
+  input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
+  outputs = model.generate(
+    input_ids=input_ids, attention_mask=attention_masks,
+    max_length=max_length,
+    do_sample=True,
+    top_k=120,
+    top_p=0.95,
+    early_stopping=True,
+    num_return_sequences=5
+  )
+  line = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+  print("** outputs", len(outputs), line)
+  return line
 gradio_interface = gradio.Interface(
+  fn=paraphrase_text,
   inputs=[
     "text",
+    gradio.Slider(5, 200, value=30, label="Min length"),
+    gradio.Slider(5, 500, value=130, label="Max length")
   ],
   outputs="text",
   examples=[

requirements.txt CHANGED Viewed

@@ -102,6 +102,7 @@ Pillow==9.2.0
 preshed==3.0.8
 prometheus-client==0.15.0
 prompt-toolkit==3.0.31
 psutil==5.9.3
 psycopg2==2.9.5
 ptyprocess==0.7.0

 preshed==3.0.8
 prometheus-client==0.15.0
 prompt-toolkit==3.0.31
+protobuf==3.20.0
 psutil==5.9.3
 psycopg2==2.9.5
 ptyprocess==0.7.0