File size: 1,981 Bytes
e33fddd 68e305f 5c0d878 68e305f 312bc9a 68e305f 77f42d9 263bcbf 77f42d9 263bcbf 35bb108 68e305f e33fddd 9328f46 0719bc2 9328f46 0719bc2 9328f46 e33fddd 9328f46 77f42d9 263bcbf 6d3ff00 77f42d9 35bb108 263bcbf 35bb108 9328f46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import random
import gradio as gr
from transformers import AutoTokenizer, pipeline, T5ForConditionalGeneration
model_name = "versae/byt5-base-finetuned-modernisa" # "versae/modernisa-pre"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)
def modernisa(lines=None, file_obj=None):
is_file_valid = False
if file_obj:
is_file_valid = file_obj.name.endswith(".txt")
if is_file_valid:
with open(file_obj.name) as file:
lines = file.read()
generated_text = ""
if lines:
lines = [line for line in lines.strip().split('\n') if line.strip()]
text2text_generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
outputs = []
for line in lines:
outputs += text2text_generator([line], max_length=150)
generated_text = "<br/>".join(output["generated_text"] for output in outputs)
if is_file_valid and file_obj:
output_file = f"{file_obj.name.rsplit('.', 1)[0]}_modernized.txt"
else:
output_file = "modernized.txt"
with open(output_file) as output:
output.write(generated_text)
return generated_text, output
samples = [
"""Otra vez, Don Iuan, me dad,
y otras mil vezes los braços.
Otra, y otras mil sean lazos
de nuestra antigua amistad.
Como venis?
Yo me siento
tan alegre, tan vfano,
tan venturoso, tan vano,
que no podrà el pensamiento
encareceros jamàs
las venturas que posseo,
porque el pensamiento creo"""
]
gr.Interface(
fn=modernisa,
inputs=[
gr.inputs.Textbox(lines=12, label="Enter Spanish Golden Age text", default=random.choice(samples)),
gr.inputs.File(file_count="single", label="Or upload a plain text file (.txt)", type="file", optional=True),
],
outputs=[
gr.outputs.HTML(label="Modern Spanish"),
gr.outputs.File(label="Download file"),
]
).launch(inline=False) |