Spaces:
Sleeping
Sleeping
Viraj Bhanushali
chore: Add requirements.txt, hindi_sentiments.model, app.py, and utils.py files
5e362b7
import gradio as gr | |
import os | |
from tokenizer_bpe import BasicTokenizer | |
print("Loading the model...") | |
model_path = os.path.join(os.getcwd(), "tokenizer_model") | |
model_path = os.path.join(model_path, "hindi_sentiments.model") | |
basic_tokenizer = BasicTokenizer() | |
basic_tokenizer.load(model_path) | |
def test_tokenizer(text): | |
ids = basic_tokenizer.encode(text) | |
decoded = basic_tokenizer.decode(ids) | |
mapping = [(str(i), basic_tokenizer.decode([i])) for i in ids] | |
return ids, decoded, mapping | |
with gr.Blocks() as demo: | |
gr.HTML("<h2 align = 'center'> Token Generation for Hindi Dataset </h2>") | |
with gr.Row(): | |
with gr.Column(): | |
inputs = [ | |
gr.TextArea( | |
label="Enter text to generate tokens in Hindi", lines=10 | |
) | |
] | |
generate_btn = gr.Button(value="Generate Text") | |
with gr.Column(): | |
enc = gr.Textbox(label="Encoded Tokens") | |
txt = gr.Textbox(label="Decoded Text from tokens") | |
map = gr.Textbox(label="Mapping of the tokens and respective texts") | |
outputs = [enc, txt, map] | |
generate_btn.click(fn=test_tokenizer, inputs=inputs, outputs=outputs) | |
if __name__ == "__main__": | |
demo.launch(share=True) |