from transformers import pipeline import requests import json import gradio as gr pipe = pipeline("translation", "guymorlan/TokenizerLabeller") # download json and open # from https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json r = requests.get("https://huggingface.co/guymorlan/TokenizerLabeller/raw/main/playaling_words.json") data = json.loads(r.text) # built gradio interface def predict(input): out = pipe(input)[0]['translation_text'] out = [x.strip() for x in out.split(" + ")] output = "
" for o in out: oo = [x.strip() for x in o.split("+")] newout = [] for ooo in oo: if ooo in data: newout.append(f"{data[ooo]['word']}") else: newout.append(ooo) output += "+".join(newout) + " | " return output[:-3] + "
" gr.Interface(predict, "textbox", "html", title="Ammiya Tokenizer", description="Tokenize Ammiya text and show Playaling words").launch()