Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import requests | |
import json | |
import gradio as gr | |
pipe = pipeline("translation", "guymorlan/TokenizerLabeller") | |
# download json and open | |
# from https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json | |
r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json") | |
data = json.loads(r.text) | |
# built gradio interface | |
def predict(input): | |
out = pipe(input)[0]['translation_text'] | |
out = [x.strip() for x in out.split(" + ")] | |
output = "<div style='direction: rtl; text-align: right; font-size: 20px; font-family: sans-serif; line-height: 1.5'>" | |
for o in out: | |
oo = [x.strip() for x in o.split("+")] | |
newout = [] | |
for ooo in oo: | |
if ooo in data: | |
newout.append(f"<span style='color: green' title='{data[ooo]['translation']}\n{data[ooo]['features']}'>{data[ooo]['word']}</span>") | |
else: | |
newout.append(ooo) | |
output += "+".join(newout) + " | " | |
return output[:-3] + "</div>" | |
gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch() | |