Vaishakhh commited on
Commit
4d53442
1 Parent(s): 7f7215b

Upload ai_re_phraser_py.py

Browse files
Files changed (1) hide show
  1. ai_re_phraser_py.py +114 -0
ai_re_phraser_py.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Ai Re-Phraser.py
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf
8
+ """
9
+
10
+ !pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
11
+
12
+ from huggingface_hub import notebook_login
13
+ notebook_login()
14
+
15
+ import os
16
+ from parrot import Parrot
17
+ import torch
18
+ import warnings
19
+ import nltk
20
+ !pip install sentence-splitter
21
+ from sentence_splitter import SentenceSplitter, split_text_into_sentences
22
+ warnings.filterwarnings("ignore")
23
+ parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5")
24
+ splitter = SentenceSplitter(language='en')
25
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
26
+ from transformers import AutoTokenizer
27
+ from transformers import AutoModelForSeq2SeqLM
28
+ import pandas as pd
29
+ from parrot.filters import Adequacy
30
+ from parrot.filters import Fluency
31
+ from parrot.filters import Diversity
32
+ adequacy_score = Adequacy()
33
+ fluency_score = Fluency()
34
+ diversity_score= Diversity()
35
+ device= "cuda:0"
36
+ adequacy_threshold = 0.90
37
+ fluency_threshold = 0.90
38
+ diversity_ranker="levenshtein"
39
+
40
+ model_name = 'tuner007/pegasus_paraphrase'
41
+ torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
42
+ tokenizer = PegasusTokenizer.from_pretrained(model_name)
43
+ model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
44
+
45
+ def get_max_str(lst):
46
+ return max(lst, key=len)
47
+ def get_response(input_text,num_return_sequences=10,num_beams=10):
48
+ batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60,return_tensors='pt').to(torch_device)
49
+ translated = model_pegasus.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
50
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
51
+ try:
52
+ adequacy_filtered_phrases = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device)
53
+ if len(adequacy_filtered_phrases) > 0 :
54
+ fluency_filtered_phrases = fluency_score.filter(adequacy_filtered_phrases, fluency_threshold, device )
55
+ if len(fluency_filtered_phrases) > 0 :
56
+ diversity_scored_phrases = diversity_score.rank(input_text, fluency_filtered_phrases, diversity_ranker)
57
+ return get_max_str(diversity_scored_phrases)
58
+ else:
59
+ return get_max_str(fluency_filtered_phrases)
60
+ else:
61
+ return get_max_str(adequacy_filtered_phrases)
62
+ except:
63
+ return(get_max_str(tgt_text))
64
+
65
+ # importing the Parrot library package
66
+
67
+ from parrot import Parrot
68
+ parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5")
69
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
70
+ from transformers import AutoTokenizer
71
+ from transformers import AutoModelForSeq2SeqLM
72
+
73
+ txt = "We apologize for keeping you on hold for longer time than excepted we are sorry for that!"
74
+
75
+ tokens = splitter.split(text=txt)
76
+
77
+ txt_paraphrase=''
78
+ for phrase in tokens:
79
+ tmp=get_response(phrase,num_return_sequences=10,num_beams=10)
80
+ txt_paraphrase=txt_paraphrase+' '+tmp
81
+
82
+ print("*"*25)
83
+ print("ORIGINAL TEXT")
84
+ print("*"*25)
85
+ print(txt)
86
+ print("*"*25)
87
+ print("PARAPHRASE TEXT")
88
+ print("*"*25)
89
+ print(txt_paraphrase)
90
+ print("*"*25)
91
+
92
+ pip install gradio
93
+
94
+ #pip freeze > requirements.txt
95
+
96
+ """# New Section"""
97
+
98
+ import gradio as gr
99
+
100
+ def get_fun(txt):
101
+ tokens = splitter.split(text=txt)
102
+
103
+ txt_paraphrase=''
104
+ for phrase in tokens:
105
+ tmp=get_response(phrase,num_return_sequences=10,num_beams=10)
106
+ txt_paraphrase=txt_paraphrase+' '+tmp
107
+ return txt_paraphrase
108
+
109
+ iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = " Ai Re-Phraser - Quotient Hackathon")
110
+ iface.launch(inline=False)
111
+
112
+ pip freeze > requirements.txt
113
+
114
+ """# New Section"""