Spaces:
Sleeping
Sleeping
File size: 1,289 Bytes
f0556d1 254cbbb f0556d1 254cbbb f0556d1 254cbbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
import os
from tokenizer.basic_bpe import BasicTokenizer
print("Loading the model...")
model_path = os.path.join(os.getcwd(), "tokenizer_model")
model_path = os.path.join(model_path, "hindi_sentiments_basic.model")
basic_tokenizer = BasicTokenizer()
basic_tokenizer.load(model_path)
def test_tokenizer(text):
ids = basic_tokenizer.encode(text)
decoded = basic_tokenizer.decode(ids)
mapping = [(str(i), basic_tokenizer.decode([i])) for i in ids]
return ids, decoded, mapping
with gr.Blocks() as demo:
gr.HTML("<h1 align = 'center'> Token Generation for Hindi Dataset </h1>")
with gr.Row():
with gr.Column():
inputs = [
gr.TextArea(
label="Enter initial text to generate tokens in Hindi", lines=10
)
]
generate_btn = gr.Button(value="Generate Text")
with gr.Column():
enc = gr.Textbox(label="Encoded Tokens")
txt = gr.Textbox(label="Decoded Text from tokens")
map = gr.Textbox(label="Mapping of the tokens and respective texts")
outputs = [enc, txt, map]
generate_btn.click(fn=test_tokenizer, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
demo.launch(share=True)
|