Bajiyo's picture
Update app.py
4755ab1 verified
raw
history blame
1.98 kB
import gradio as gr
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
from huggingface_hub import from_pretrained_keras, hf_hub_download
# Function to convert sequences back to strings
def sequence_to_text(sequence, tokenizer):
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
text = ''.join([reverse_word_map.get(i, '') for i in sequence])
return text
# Load the model from Hugging Face repository
model = from_pretrained_keras("Bajiyo/Malayalam_transliteration")
# Load tokenizers
repo_id = "Bajiyo/Malayalam_transliteration"
source_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="source_tokenizer.json")
target_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="target_tokenizer.json")
with open(source_tokenizer_path) as f:
source_tokenizer_data = json.load(f)
source_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(source_tokenizer_data)
with open(target_tokenizer_path) as f:
target_tokenizer_data = json.load(f)
target_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(target_tokenizer_data)
max_seq_length = 100 # Set the maximum sequence length (adjust if necessary)
def transliterate(input_text):
input_sequence = source_tokenizer.texts_to_sequences([input_text])
input_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
prediction = model.predict(input_padded)
predicted_sequence = np.argmax(prediction, axis=-1)[0]
predicted_text = sequence_to_text(predicted_sequence, target_tokenizer)
return predicted_text
# Set up Gradio interface
iface = gr.Interface(
fn=transliterate,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Malayalam text here..."),
outputs="text",
title="Malayalam to English Transliteration",
description="Enter Malayalam names to get their English transliterations."
)
if __name__ == "__main__":
iface.launch()