chrisjay's picture
update based on revised joeynmt
e26a582
import gradio as gr
import yaml
from joeynmt.prediction import load_params_for_prediction,translate_for_hf_space
from huggingface_hub import hf_hub_download
language_map = {'English':'en','Swahili':'sw','Fon':'fon','Igbo':'ig',
'Arabic':'ar','Shona':'sn','Ẹ̀dó':'bin','Hausa':'ha',
'Efik':'efi','Twi':'twi','Afrikaans':'af','Yoruba':'yo','Urhobo':'urh','Dendi':'ddn','̀Ẹ̀sán':'ish','Isoko':'iso',
'Kamba':'kam','Luo':'luo','Southern Ndebele':'nr','Tshivenda':'ve'}
#List of available languages I worked on.
#...
available_language_pairs =['en-sw','en-af','en-ar','en-ddn','en-ish','en-iso','en-kam','en-luo','en-nr','en-ve','efi-en','en-bin','en-ha','en-ig','en-fon','en-twi','sn-en','sw-en','yo-en','en-urh']
available_languages = list(language_map.keys())
def load_config(path="configs/default.yaml") -> dict:
"""
CODE ADAPTED FROM: https://github.com/joeynmt/joeynmt
Loads and parses a YAML configuration file.
:param path: path to YAML configuration file
:return: configuration dictionary
"""
with open(path, 'r', encoding="utf-8") as ymlfile:
cfg = yaml.safe_load(ymlfile)
return cfg
def load_model(source_language,target_language):
#source_language = language_map[source_language_]
#target_language = language_map[target_language_]
translation_dir = 'main'
try:
file_yaml = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/config.yaml",force_filename='config.yaml')
src_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/src_vocab.txt")
trg_vocab = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/trg_vocab.txt")
best_ckpt = hf_hub_download("chrisjay/masakhane_benchmarks", filename=f"{source_language}-{target_language}/{translation_dir}/best.ckpt")
except Exception:
raise Exception(f'It seems we do not have a working configuration repo yet for {source_language} -> {target_language}. \n You could help us by creating it here: https://huggingface.co/chrisjay/masakhane_benchmarks/tree/main')
parsed_yaml_file = load_config(file_yaml)
parsed_yaml_file['data']['src_vocab']=src_vocab
parsed_yaml_file['data']['trg_vocab']=trg_vocab
params = load_params_for_prediction(parsed_yaml_file,best_ckpt)
return params
#Load models of all available language pairs
model_mapping = {}
examples_available_models=[] # Keep track of models that loaded successfully and display only them in the Examples.
for availabe_lang in available_language_pairs:
try:
model_mapping.update({availabe_lang:load_model(availabe_lang.split('-')[0],availabe_lang.split('-')[1])})
examples_available_models.append([f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[0])]}",f"{list(language_map.keys())[list(language_map.values()).index(availabe_lang.split('-')[1])]}"]) #idea to extract key from value got from https://stackoverflow.com/questions/8023306/get-key-by-value-in-dictionary
except Exception:
continue
if examples_available_models==[]:
raise Exception(f'Available models for Space cannot be empty!')
def get_translation(source_language,target_language,source_sentence=None,source_file=None):
'''
This takes a sentence and gets the translation.
'''
source_language_ = language_map[source_language]
target_language_ = language_map[target_language]
source = source_sentence
translation_type='sentence'
if source_file!=None:
translation_type='file'
source = source_file.name
try:
params = model_mapping[f'{source_language_}-{target_language_}']
pred = translate_for_hf_space(params,source,translation_type)
except Exception:
return f'There was an issue loading the translation model for {source_language} -> {target_language}. Try another pair please'
return pred[0] if source_file==None else pred
title = "Interact with Masakhane Benchmark Models"
description = "This enables you to interact with some of the Masakhane Benchmark Models and keep up with their improvement. Some of these models undergo finetuning on a regular basis. This way, you can easily use the best model with no hassles."
iface = gr.Interface(fn=get_translation,
inputs=[gr.inputs.Dropdown(choices = available_languages,default='English'),
gr.inputs.Dropdown(choices = available_languages,default='Swahili'),
gr.inputs.Textbox(label="Input"),
gr.inputs.File(file_count="single", type="file", label='Or upload txt file containing sentences', optional=True)],
outputs=gr.outputs.Textbox(type="auto", label='Translation'),
title=title,
description=description,
examples=examples_available_models,
enable_queue=True,
theme='huggingface')
iface.launch()