Spaces:
Runtime error
Runtime error
File size: 4,431 Bytes
5a34301 87938f5 5a34301 546da88 9c094ba 546da88 0353678 9c094ba 437058b 9c094ba 546da88 9c094ba 546da88 9c094ba 0353678 9c094ba 437058b 9c094ba 546da88 9c094ba 546da88 9c094ba 437058b 9c094ba 990ed62 9c094ba 990ed62 546da88 990ed62 546da88 5a34301 9c094ba 5a34301 9c094ba d8be3b1 9c094ba bf1499f 9c094ba ee1eb9b 8ca86e2 5a34301 87938f5 5a34301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
tokenizer = AutoTokenizer.from_pretrained("milyiyo/paraphraser-german-mt5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("milyiyo/paraphraser-german-mt5-small")
def generate_v1(inputs, count):
"""Generate text using a Beam Search strategy with repetition penalty."""
model_outputs = model.generate(inputs["input_ids"],
early_stopping=True,
length_penalty=1.0,
max_length=1024,
no_repeat_ngram_size=2,
num_beams=10,
repetition_penalty=3.5,
num_return_sequences=count
)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v2(inputs, count):
"""Generate text using a Beam Search strategy."""
model_outputs = model.generate(inputs["input_ids"],
early_stopping=True,
length_penalty=2.0,
max_length=1024,
no_repeat_ngram_size=2,
num_beams=5,
temperature=1.5,
num_return_sequences=count
)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v3(inputs, count):
"""Generate text using a Diverse Beam Search strategy."""
model_outputs = model.generate(inputs["input_ids"],
num_beams=5,
max_length=1024,
temperature=1.5,
num_beam_groups=5,
diversity_penalty=2.0,
no_repeat_ngram_size=2,
early_stopping=True,
length_penalty=2.0,
num_return_sequences=count)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v4(encoding, count):
"""Generate text using a Diverse Beam Search strategy."""
print(encoding)
input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
print(input_ids)
print(attention_masks)
outputs = model.generate(input_ids=input_ids,
attention_mask=attention_masks,
max_length=512,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=count)
res = []
for output in outputs:
line = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
res.append(line)
return res
def paraphrase(sentence: str, count: str):
p_count = int(count)
if p_count <= 0 or len(sentence.strip()) == 0:
return {'result': []}
sentence_input = sentence
text = f"paraphrase: {sentence_input} </s>"
encoding = tokenizer(text, return_tensors="pt")
return {
'result': {
'generate_v1':generate_v1(encoding, p_count),
'generate_v2':generate_v2(encoding, p_count),
'generate_v3':generate_v3(encoding, p_count),
'generate_v4':generate_v4(encoding, p_count)
}
}
def paraphrase_dummy(sentence: str, count: str):
return {'result': []}
iface = gr.Interface(fn=paraphrase,
inputs=[
gr.inputs.Textbox(lines=2, placeholder=None, label='Sentence'),
gr.inputs.Number(default=3, label='Paraphrases count'),
],
outputs=[gr.outputs.JSON(label=None)])
iface.launch() |