guymorlan's picture
Update app.py
6db2364
raw
history blame
1.23 kB
from transformers import pipeline
import requests
import json
import gradio as gr
pipe = pipeline("translation", "guymorlan/TokenizerLabeller")
# download json and open
# from https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json")
data = json.loads(r.text)
# built gradio interface
def predict(input):
out = pipe(input)[0]['translation_text']
out = [x.strip() for x in out.split(" + ")]
output = "<div style='direction: rtl; text-align: right; font-size: 20px; font-family: sans-serif; line-height: 1.5'>"
for o in out:
oo = [x.strip() for x in o.split("+")]
newout = []
for ooo in oo:
if ooo in data:
newout.append(f"<span style='color: green' title='{data[ooo]['translation']}\n{data[ooo]['features']}'>{data[ooo]['word']}</span>")
else:
newout.append(ooo)
output += "+".join(newout) + " | "
return output[:-3] + "</div>"
gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch()