File size: 1,259 Bytes
face555
 
 
 
dffc8e2
face555
dffc8e2
face555
 
 
 
dffc8e2
face555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8bd88f
face555
 
 
 
 
 
fa6261f
face555
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
import pandas as pd
import os
import time
import torch
from transformers import pipeline, GPT2Tokenizer, OPTForCausalLM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model=OPTForCausalLM.from_pretrained('pushkarraj/pushkar_OPT_paraphaser')
tokenizer=GPT2Tokenizer.from_pretrained('pushkarraj/pushkar_OPT_paraphaser',truncation=True)

generator=pipeline("text-generation",model=model,tokenizer=tokenizer,device=device)

def cleaned_para(input_sentence):
  p=generator('<s>'+input_sentence+ '</s>>>>><p>',do_sample=True,max_length=len(input_sentence.split(" "))+200,temperature = 0.8,repetition_penalty=1.2,top_p=0.4,top_k=1)
  return p[0]['generated_text'].split('</s>>>>><p>')[1].split('</p>')[0]

from spacy.lang.en import English # updated

def sentensizer(raw_text):
  nlp = English()
  nlp.add_pipe("sentencizer") # updated
  doc = nlp(raw_text)
  sentences = [sent for sent in doc.sents]
  print(sentences)
  return sentences


def paraphraser(text):
  begin=time.time()
  x=[cleaned_para(str(i)) for i in sentensizer(text)]
  end=time.time()
  return (".".join(x))

interface=gr.Interface(fn=paraphraser,inputs="text",outputs=["text"],title="Paraphraser",description="A paraphrasing tool")
interface.launch()