tomsoderlund commited on
Commit
374bb44
1 Parent(s): 7180e60

paraphrase_text

Browse files
Files changed (2) hide show
  1. app.py +26 -5
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,19 +1,40 @@
1
  import gradio
2
- from transformers import pipeline
 
3
 
4
- def summarize_text(text, min_length, max_length):
5
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
6
  short_text = text[:1024]
7
  summary = summarizer(short_text, max_length, min_length, do_sample=False)
8
  print("** summary", summary)
9
  return summary[0]["summary_text"]
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  gradio_interface = gradio.Interface(
12
- fn=summarize_text,
13
  inputs=[
14
  "text",
15
- gradio.Slider(5, 200, value=30),
16
- gradio.Slider(5, 500, value=130)
17
  ],
18
  outputs="text",
19
  examples=[
 
1
  import gradio
2
+ import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
+ def shorten_text(text, min_length, max_length):
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
  short_text = text[:1024]
8
  summary = summarizer(short_text, max_length, min_length, do_sample=False)
9
  print("** summary", summary)
10
  return summary[0]["summary_text"]
11
 
12
+ def paraphrase_text(text, min_length, max_length):
13
+ tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ text_instruction = "paraphrase: " + text + " </s>"
17
+ encoding = tokenizer.encode_plus(text_instruction, padding="longest", return_tensors="pt")
18
+ input_ids, attention_masks = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)
19
+ outputs = model.generate(
20
+ input_ids=input_ids, attention_mask=attention_masks,
21
+ max_length=max_length,
22
+ do_sample=True,
23
+ top_k=120,
24
+ top_p=0.95,
25
+ early_stopping=True,
26
+ num_return_sequences=5
27
+ )
28
+ line = tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
29
+ print("** outputs", len(outputs), line)
30
+ return line
31
+
32
  gradio_interface = gradio.Interface(
33
+ fn=paraphrase_text,
34
  inputs=[
35
  "text",
36
+ gradio.Slider(5, 200, value=30, label="Min length"),
37
+ gradio.Slider(5, 500, value=130, label="Max length")
38
  ],
39
  outputs="text",
40
  examples=[
requirements.txt CHANGED
@@ -102,6 +102,7 @@ Pillow==9.2.0
102
  preshed==3.0.8
103
  prometheus-client==0.15.0
104
  prompt-toolkit==3.0.31
 
105
  psutil==5.9.3
106
  psycopg2==2.9.5
107
  ptyprocess==0.7.0
 
102
  preshed==3.0.8
103
  prometheus-client==0.15.0
104
  prompt-toolkit==3.0.31
105
+ protobuf==3.20.0
106
  psutil==5.9.3
107
  psycopg2==2.9.5
108
  ptyprocess==0.7.0