# -*- coding: utf-8 -*- """Ai Re-Phraser.py Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/18bvmXQqMIkk7G0gY_1dUolI08RK6Ajrf """ #pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5") import os from parrot import Parrot import torch import warnings import nltk #!pip install sentence-splitter from sentence_splitter import SentenceSplitter, split_text_into_sentences warnings.filterwarnings("ignore") splitter = SentenceSplitter(language='en') from transformers import PegasusForConditionalGeneration, PegasusTokenizer from transformers import AutoTokenizer from transformers import AutoModelForSeq2SeqLM import pandas as pd from parrot.filters import Adequacy from parrot.filters import Fluency from parrot.filters import Diversity adequacy_score = Adequacy() fluency_score = Fluency() diversity_score= Diversity() device= "cuda:0" adequacy_threshold = 0.90 fluency_threshold = 0.90 diversity_ranker="levenshtein" model_name = 'tuner007/pegasus_paraphrase' torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' tokenizer = PegasusTokenizer.from_pretrained(model_name) model_pegasus = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) def get_max_str(lst): return max(lst, key=len) def get_response(input_text,num_return_sequences=10,num_beams=10): batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60,return_tensors='pt').to(torch_device) translated = model_pegasus.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5) tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) try: adequacy_filtered_phrases = adequacy_score.filter(input_text,tgt_text, adequacy_threshold, device) if len(adequacy_filtered_phrases) > 0 : fluency_filtered_phrases = fluency_score.filter(adequacy_filtered_phrases, fluency_threshold, device ) if len(fluency_filtered_phrases) > 0 : diversity_scored_phrases = diversity_score.rank(input_text, fluency_filtered_phrases, diversity_ranker) return get_max_str(diversity_scored_phrases) else: return get_max_str(fluency_filtered_phrases) else: return get_max_str(adequacy_filtered_phrases) except: return(get_max_str(tgt_text)) import gradio as gr def get_fun(txt): tokens = splitter.split(text=txt) txt_paraphrase='' for phrase in tokens: tmp=get_response(phrase,num_return_sequences=10,num_beams=10) txt_paraphrase=txt_paraphrase+' '+tmp return txt_paraphrase iface = gr.Interface(fn=get_fun, inputs="text", outputs="text", title = " Ai Re-Phraser - Quotient Hackathon") iface.launch(inline=False) """# New Section"""