# -*- coding: utf-8 -*- """Ai Re-Phraser.py Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf """ # importing the libraries import os import pandas as pd from parrot import Parrot #Parrot offers knobs to control Adequacy, Fluency and Diversity metrics import torch import warnings import nltk import warnings warnings.filterwarnings("ignore") from sentence_splitter import SentenceSplitter, split_text_into_sentences splitter = SentenceSplitter(language='en') from parrot import Parrot from transformers import PegasusForConditionalGeneration, PegasusTokenizer from transformers import AutoTokenizer from transformers import AutoModelForSeq2SeqLM from parrot.filters import Adequacy from parrot.filters import Fluency from parrot.filters import Diversity # Adding the metrics adequacy_score = Adequacy() fluency_score = Fluency() diversity_score= Diversity() device= "cuda:0" adequacy_threshold = 0.90# Adequacy of the English sentences fluency_threshold = 0.80 # English Fluency diversity_ranker="levenshtein" # Diversity (Lexical / Phrasal / Syntactical) (How much has the paraphrase changed the original sentence?) do_diverse=False # Diverse The sentences formation #num_beam_groups (int) — Number of groups to divide num_beams into in order to ensure diversity among different groups of beams # adding the model model_name = 'tuner007/pegasus_paraphrase' torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' tokenizer = PegasusTokenizer.from_pretrained(model_name) #Pre-trained Model model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) def get_max_str(lst): return max(lst, key=len) def get_response(input_text): batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=90, return_tensors='pt').to(torch_device) translated = model_pegasus.generate(**batch,max_length=30,num_beams=15, num_return_sequences=15, temperature=1.5) #num_beam_groups=num_beams, diversity_penalty=0.5 tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) try: adequacy_filtered = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device) if len(adequacy_filtered) > 0 : fluency_filtered = fluency_score.filter(adequacy_filtered, fluency_threshold, device ) if len(fluency_filtered) > 0 : diversity_scored = diversity_score.rank(input_text, fluency_filtered, diversity_ranker) return get_max_str(diversity_scored) else: return get_max_str(fluency_filtered) else: return get_max_str(adequacy_filtered) except: return(get_max_str(tgt_text)) # Deploying the model import gradio as gr def get_fun(Input_txt): tokens = splitter.split(text=Input_txt) txt_paraphrase='' for phrase in tokens: tmp=get_response(phrase) txt_paraphrase=txt_paraphrase+' '+tmp return txt_paraphrase iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = "Ai Re-Phraser") iface.launch(inline=False) """# New Section"""