File size: 3,208 Bytes
4d53442 6c4cc42 d5e2292 4d53442 1dd6e7f e5a4bcd f7f2b99 54acd71 4d53442 54acd71 655bbff 4d53442 54acd71 4d53442 54acd71 6b20835 54acd71 6b20835 54acd71 4d53442 54acd71 4d53442 9727b9e 655bbff 54acd71 4d53442 9727b9e 4d53442 9d66f2e aae65d2 922bde8 dddbd3c 4d53442 83b8e1f 2ddaf73 83b8e1f 2ddaf73 83b8e1f 4d53442 83b8e1f 4d53442 83b8e1f 4d53442 54acd71 4d53442 83b8e1f 4d53442 9d66f2e 4d53442 f677b84 4d53442 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# -*- coding: utf-8 -*-
"""Ai Re-Phraser.py
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf
"""
# importing the libraries
import os
import pandas as pd
from parrot import Parrot #Parrot offers knobs to control Adequacy, Fluency and Diversity metrics
import torch
import warnings
import nltk
import warnings
warnings.filterwarnings("ignore")
from sentence_splitter import SentenceSplitter, split_text_into_sentences
splitter = SentenceSplitter(language='en')
from parrot import Parrot
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
from parrot.filters import Adequacy
from parrot.filters import Fluency
from parrot.filters import Diversity
# Adding the metrics
adequacy_score = Adequacy()
fluency_score = Fluency()
diversity_score= Diversity()
device= "cuda:0"
adequacy_threshold = 0.90# Adequacy of the English sentences
fluency_threshold = 0.80 # English Fluency
diversity_ranker="levenshtein" # Diversity (Lexical / Phrasal / Syntactical) (How much has the paraphrase changed the original sentence?)
do_diverse=False # Diverse The sentences formation
#num_beam_groups (int) — Number of groups to divide num_beams into in order to ensure diversity among different groups of beams
# adding the model
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name) #Pre-trained Model
model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
def get_max_str(lst):
return max(lst, key=len)
def get_response(input_text):
batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=90, return_tensors='pt').to(torch_device)
translated = model_pegasus.generate(**batch,max_length=30,num_beams=15, num_return_sequences=15, temperature=1.5)
#num_beam_groups=num_beams, diversity_penalty=0.5
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
try:
adequacy_filtered = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device)
if len(adequacy_filtered) > 0 :
fluency_filtered = fluency_score.filter(adequacy_filtered, fluency_threshold, device )
if len(fluency_filtered) > 0 :
diversity_scored = diversity_score.rank(input_text, fluency_filtered, diversity_ranker)
return get_max_str(diversity_scored)
else:
return get_max_str(fluency_filtered)
else:
return get_max_str(adequacy_filtered)
except:
return(get_max_str(tgt_text))
# Deploying the model
import gradio as gr
def get_fun(Input_txt):
tokens = splitter.split(text=Input_txt)
txt_paraphrase=''
for phrase in tokens:
tmp=get_response(phrase)
txt_paraphrase=txt_paraphrase+' '+tmp
return txt_paraphrase
iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = "Ai Re-Phraser")
iface.launch(inline=False)
"""# New Section""" |