Spaces:
Sleeping
Sleeping
import gradio as gr | |
# --- Minimal Sanskrit lexicon (extend with real data) --- | |
LEXICON = { | |
"राम", "वन", "गच्छति", "गुरु", "इन्द्र", "तत्", "अपि", | |
"धर्म", "क्षेत्र", "कुरु", "क्षेत्रे" | |
} | |
# --- Basic Reverse Sandhi Rules --- | |
REVERSE_SANDHI_RULES = [ | |
("ा", ["अ+अ"]), # ā → a + a | |
("े", ["अ+इ", "अ+ई"]), # e → a+i or a+ī | |
("ो", ["अ+उ", "अ+ऊ"]), # o → a+u or a+ū | |
("ः", ["ः+"]), # visarga restoration | |
] | |
def generate_candidates(word): | |
candidates = [] | |
for i in range(1, len(word)): | |
left, right = word[:i], word[i:] | |
# Direct split | |
if left in LEXICON and right in LEXICON: | |
candidates.append((left, right)) | |
# Apply reverse sandhi substitutions | |
for ch, expansions in REVERSE_SANDHI_RULES: | |
if left.endswith(ch): | |
for exp in expansions: | |
l_base = left[:-1] + exp.split("+")[0] | |
r_base = exp.split("+")[1] + right | |
if l_base in LEXICON and r_base in LEXICON: | |
candidates.append((l_base, r_base)) | |
# Deduplicate | |
candidates = list(set(candidates)) | |
return candidates or [("No plausible split found", "")] | |
def sandhi_splitter(word): | |
candidates = generate_candidates(word.strip()) | |
formatted = [" + ".join(c) for c in candidates] | |
return "\n".join(formatted) | |
with gr.Blocks() as demo: | |
gr.Markdown("## Sanskrit Sandhi-Splitter (Prototype)") | |
gr.Markdown("Enter a Sanskrit compound word (Devanagari) to see possible splits.") | |
inp = gr.Textbox(label="Compound Word (e.g. धर्मक्षेत्रे)") | |
out = gr.Textbox(label="Candidate Splits") | |
btn = gr.Button("Split Sandhi") | |
btn.click(fn=sandhi_splitter, inputs=inp, outputs=out) | |
if __name__ == "__main__": | |
demo.launch() | |