languageBPE / app.py
AkashDataScience's picture
Testing colors
c8dbcc1
raw
history blame
1.44 kB
import torch
import random
import gradio as gr
from language_bpe import BPETokenizer
tokenizer = BPETokenizer()
tokenizer.load('models/english_5000.model')
def inference(input_text):
encoding = tokenizer.encode_ordinary(input_text)
# sentence = [tokenizer.decode([x]) for x in encoding]
# color_sentence = ""
# for word in sentence:
# background_color = random.randint(40, 47)
# color_sentence += f"\033[0;37;{background_color}m {word}"
color_sentence = f"\033[0;37;{41}m Black" + f"\033[0;37;{42}m Black"
return len(encoding), color_sentence, encoding
title = "Bilingual tokenizer"
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
examples = [["He walked into the basement with the horror movie from the night before playing in his head."],
["Henry couldn't decide if he was an auto mechanic or a priest."],
["Poison ivy grew through the fence they said was impenetrable."],
]
demo = gr.Interface(
inference,
inputs = [
gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
],
outputs = [
gr.Label(label="Token count"),
gr.Textbox(label="Sentence after tokenization", type="text"),
gr.Textbox(label="Encoding", type="text")
],
title = title,
description = description,
examples = examples,
)
demo.launch()