Spaces:

AkashDataScience
/

languageBPE

Sleeping

File size: 1,019 Bytes

41dfb3a

import torch
import gradio as gr
from language_bpe import BPETokenizer

tokenizer = BPETokenizer()
tokenizer.load('models/english_5000.model')

def inference(input_text):
    tokens = tokenizer.encode_ordinary(input_text)
    
    return tokens

title = "A bilingual tokenizer build using opus and wikipedia data"
description = "A simple Gradio interface to see tokenization of Hindi and English(Hinglish) text"
examples = [["He walked into the basement with the horror movie from the night before playing in his head."], 
           ["Henry couldn't decide if he was an auto mechanic or a priest."], 
           ["Poison ivy grew through the fence they said was impenetrable."], 
           ]
demo = gr.Interface(
    inference, 
    inputs = [
        gr.Textbox(label="Enter any sentence in Hindi, English or both language", type="text"),
        ], 
    outputs = [
        gr.Textbox(label="Output", type="text")
        ],
    title = title,
    description = description,
    examples = examples,
)
demo.launch()