| import random |
| import gradio as gr |
| from datasets import load_dataset |
|
|
| DATASET_NAME = "sotalab/en-vi-ja-300k-triplets" |
| SPLIT = "train" |
|
|
| print("Loading dataset...") |
| dataset = load_dataset(DATASET_NAME, split=SPLIT) |
| print(f"Loaded {len(dataset)} samples") |
|
|
| def sample_example(): |
| item = random.choice(dataset) |
| return ( |
| item["en"], |
| item["vi"], |
| item["ja"], |
| ) |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown( |
| """ |
| # EN–VI–JA Triplet Dataset Viewer |
| |
| This Space provides a random sample viewer for the **300K English–Vietnamese–Japanese parallel triplet dataset**. |
| |
| - Languages: English / Vietnamese / Japanese |
| - Format: sentence-level aligned triplets |
| - Source: curated from OPUS parallel corpora |
| """ |
| ) |
|
|
| btn = gr.Button("🔄 Sample random triplet") |
|
|
| with gr.Row(): |
| en = gr.Textbox(label="English", lines=4) |
| vi = gr.Textbox(label="Vietnamese", lines=4) |
| ja = gr.Textbox(label="Japanese", lines=4) |
|
|
| btn.click( |
| fn=sample_example, |
| outputs=[en, vi, ja] |
| ) |
|
|
| demo.launch() |
|
|