Spaces:
Runtime error
Runtime error
File size: 5,081 Bytes
5a34301 edac3fb eba4bed edac3fb eba4bed 5a34301 546da88 9c094ba 546da88 0353678 9c094ba 437058b 9c094ba 546da88 9c094ba 546da88 9c094ba 0353678 9c094ba 437058b 9c094ba 546da88 9c094ba 546da88 9c094ba 437058b 9c094ba 990ed62 9c094ba 990ed62 546da88 990ed62 546da88 5a34301 9c094ba 5a34301 9c094ba d8be3b1 9c094ba bf1499f 9c094ba ee1eb9b 8ca86e2 5a34301 87938f5 5a34301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
# versions = {
# 'v1': {
# 'name': "milyiyo/paraphraser-german-mt5-small",
# 'tokenizer': None,
# 'model': None
# },
# 'v2': {
# 'name':"milyiyo/paraphraser-german-mt5-small-v2",
# 'tokenizer': None,
# 'model': None
# },
# }
# versions['v1']['tokenizer'] = AutoTokenizer.from_pretrained(versions['v1']['name'])
# versions['v1']['model'] = AutoModelForSeq2SeqLM.from_pretrained(versions['v1']['name'])
# versions['v2']['tokenizer'] = AutoTokenizer.from_pretrained(versions['v2']['name'])
# versions['v2']['model'] = AutoModelForSeq2SeqLM.from_pretrained(versions['v2']['name'])
tokenizer = AutoTokenizer.from_pretrained("milyiyo/paraphraser-german-mt5-small-v2")
model = AutoModelForSeq2SeqLM.from_pretrained("milyiyo/paraphraser-german-mt5-small-v2")
def generate_v1(inputs, count):
"""Generate text using a Beam Search strategy with repetition penalty."""
model_outputs = model.generate(inputs["input_ids"],
early_stopping=True,
length_penalty=1.0,
max_length=1024,
no_repeat_ngram_size=2,
num_beams=10,
repetition_penalty=3.5,
num_return_sequences=count
)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v2(inputs, count):
"""Generate text using a Beam Search strategy."""
model_outputs = model.generate(inputs["input_ids"],
early_stopping=True,
length_penalty=2.0,
max_length=1024,
no_repeat_ngram_size=2,
num_beams=5,
temperature=1.5,
num_return_sequences=count
)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v3(inputs, count):
"""Generate text using a Diverse Beam Search strategy."""
model_outputs = model.generate(inputs["input_ids"],
num_beams=5,
max_length=1024,
temperature=1.5,
num_beam_groups=5,
diversity_penalty=2.0,
no_repeat_ngram_size=2,
early_stopping=True,
length_penalty=2.0,
num_return_sequences=count)
sentences = []
for output in model_outputs:
sentences.append(tokenizer.decode(output, skip_special_tokens=True))
return sentences
def generate_v4(encoding, count):
"""Generate text using a Diverse Beam Search strategy."""
print(encoding)
input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
print(input_ids)
print(attention_masks)
outputs = model.generate(input_ids=input_ids,
attention_mask=attention_masks,
max_length=512,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=count)
res = []
for output in outputs:
line = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
res.append(line)
return res
def paraphrase(sentence: str, count: str):
p_count = int(count)
if p_count <= 0 or len(sentence.strip()) == 0:
return {'result': []}
sentence_input = sentence
text = f"paraphrase: {sentence_input} </s>"
encoding = tokenizer(text, return_tensors="pt")
return {
'result': {
'generate_v1':generate_v1(encoding, p_count),
'generate_v2':generate_v2(encoding, p_count),
'generate_v3':generate_v3(encoding, p_count),
'generate_v4':generate_v4(encoding, p_count)
}
}
def paraphrase_dummy(sentence: str, count: str):
return {'result': []}
iface = gr.Interface(fn=paraphrase,
inputs=[
gr.inputs.Textbox(lines=2, placeholder=None, label='Sentence'),
gr.inputs.Number(default=3, label='Paraphrases count'),
],
outputs=[gr.outputs.JSON(label=None)])
iface.launch() |