bkhmsi's picture
initialized repo
d36d50b
raw
history blame
No virus
1.51 kB
import os
import yaml
import gdown
import gradio as gr
from predict import PredictTri
output_path = "tashkeela-d2.pt"
if not os.path.exists(output_path):
model_gdrive_id = "1FGelqImFkESbTyRsx_elkKIOZ9VbhRuo"
gdown.download(id=model_gdrive_id, output=output_path, quiet=False)
output_path = "vocab.vec"
if not os.path.exists(output_path):
vocab_gdrive_id = "1-0muGvcSYEf8RAVRcwXay4MRex6kmCii"
gdown.download(id=vocab_gdrive_id, output=output_path, quiet=False)
with open("config.yaml", 'r', encoding="utf-8") as file:
config = yaml.load(file, Loader=yaml.FullLoader)
config["train"]["max-sent-len"] = config["predictor"]["window"]
config["train"]["max-token-count"] = config["predictor"]["window"] * 3
def diacritze(text):
print(text)
predictor = PredictTri(config, text)
diacritized_lines = predictor.predict_majority_vote()
return '\n'.join(diacritized_lines)
with gr.Blocks() as demo:
gr.Markdown(
"""
# Partial Diacritization
TODO: put paper links here
""")
input_txt = gr.Textbox(
placeholder="اكتب هنا",
lines=5,
label="Input",
type='text',
# rtl=True,
# text_align='right',
)
output_txt = gr.Textbox(
lines=5,
label="Output",
type='text',
# rtl=True,
# text_align='right',
)
btn = gr.Button(value="Shakkel")
btn.click(diacritze, inputs=input_txt, outputs=output_txt)
if __name__ == "__main__":
demo.launch()