pushkarraj's picture
empty cache
ed16e15
import gradio as gr
import pandas as pd
import os
import time
import torch
from transformers import pipeline, GPT2Tokenizer, OPTForCausalLM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()
model=OPTForCausalLM.from_pretrained("pushkarraj/opt_paraphraser")
tokenizer=GPT2Tokenizer.from_pretrained("facebook/opt-1.3b",truncation=True)
generator=pipeline("text-generation",model=model,tokenizer=tokenizer,device=device)
def cleaned_para(input_sentence):
p=generator('<s>'+input_sentence+ '</s>>>>><p>',do_sample=True,max_length=len(input_sentence.split(" "))+200,temperature = 0.9,repetition_penalty=1.2)
return p[0]['generated_text'].split('</s>>>>><p>')[1].split('</p>')[0]
from spacy.lang.en import English # updated
def sentensizer(raw_text):
nlp = English()
nlp.add_pipe("sentencizer") # updated
doc = nlp(raw_text)
sentences = [sent for sent in doc.sents]
print(sentences)
return sentences
def paraphraser(text):
begin=time.time()
x=[cleaned_para(str(i)) for i in sentensizer(text)]
end=time.time()
print(end-begin)
return ("".join(x))
interface=gr.Interface(fn=paraphraser,inputs="text",outputs=["text"],title="Paraphraser",description="A paraphrasing tool")
interface.launch()