import os import gradio as gr from transformers import pipeline, AutoTokenizer import torch import spaces import json from huggingface_hub import HfApi, upload_file # --- Constants --- HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_REPO = "Pisethan/khmer-lesson-dataset-generated" LOCAL_JSONL = "generated_lessons.jsonl" # --- Options --- grade_options = ["1", "2", "3", "4", "5", "6"] topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"] level_options = ["Beginner", "Intermediate", "Advanced"] # --- Tokenizer (global) --- tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN) # --- Helper to save and upload --- def save_to_jsonl(record): with open(LOCAL_JSONL, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") upload_file( path_or_fileobj=LOCAL_JSONL, path_in_repo="generated_lessons.jsonl", repo_id=DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) # --- Generation for one lesson --- @spaces.GPU def generate_lesson(grade, topic, level): device = 0 if torch.cuda.is_available() else -1 pipe = pipeline( "text-generation", model="Pisethan/khmer-lesson-model-v2", tokenizer=tokenizer, device=device, token=HF_TOKEN ) prompt = f""" You are a lesson planning assistant. Return only one structured Khmer math lesson plan with these fields: Lesson Title: Objective: Activity: Instruction (Khmer): Materials: Please follow the structure exactly. Grade: {grade} Topic: {topic} TaRL Level: {level} """ output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id) result = output[0]['generated_text'] # Save to dataset record = { "grade": grade, "topic": topic, "level": level, "prompt": prompt.strip(), "completion": result.strip() } save_to_jsonl(record) return result # --- Generation for all combinations --- @spaces.GPU def generate_all_lessons(): device = 0 if torch.cuda.is_available() else -1 pipe = pipeline( "text-generation", model="Pisethan/khmer-lesson-model-v2", tokenizer=tokenizer, device=device, token=HF_TOKEN ) results = "" for grade in grade_options: for topic in topic_options: for level in level_options: prompt = f"""Generate a Khmer math lesson plan. Grade: {grade} Topic: {topic} TaRL Level: {level}""" output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True) result = output[0]['generated_text'] record = { "grade": grade, "topic": topic, "level": level, "prompt": prompt.strip(), "completion": result.strip() } save_to_jsonl(record) results += f"๐Ÿ”น แžแŸ’แž“แžถแž€แŸ‹ {grade} | {topic} | {level}\n{result}\n\n{'-'*50}\n\n" return results # --- UI --- with gr.Blocks() as demo: gr.Markdown("## ๐Ÿค– แžขแŸ’แž“แž€แž‡แŸ†แž“แžฝแž™แž”แž„แŸ’แž€แžพแžแž˜แŸแžšแŸ€แž“แž‚แžŽแžทแžแžœแžทแž‘แŸ’แž™แžถ") gr.Markdown("แž‡แŸ’แžšแžพแžŸแžšแžพแžŸแžแŸ’แž“แžถแž€แŸ‹ แž”แŸ’แžšแž’แžถแž“แž”แž‘ แž“แžทแž„แž€แž˜แŸ’แžšแžทแžแžŸแžทแžŸแŸ’แžŸ แžšแžฝแž…แž…แžปแž…แž”แž„แŸ’แž€แžพแžแž˜แŸแžšแŸ€แž“แŸ” แžฌแž…แžปแž…แž”แŸŠแžผแžแžปแž„แžแžถแž„แž€แŸ’แžšแŸ„แž˜แžŸแž˜แŸ’แžšแžถแž”แŸ‹แž”แž„แŸ’แž€แžพแžแž˜แŸแžšแŸ€แž“แž‘แžถแŸ†แž„แžขแžŸแŸ‹แŸ”") with gr.Row(): grade = gr.Dropdown(choices=grade_options, label="แžแŸ’แž“แžถแž€แŸ‹ (Grade)", value="1") topic = gr.Dropdown(choices=topic_options, label="แž”แŸ’แžšแž’แžถแž“แž”แž‘ (Topic)", value="Addition") level = gr.Dropdown(choices=level_options, label="แž€แž˜แŸ’แžšแžทแžแžŸแžทแžŸแŸ’แžŸ (TaRL Level)", value="Beginner") output_box = gr.Textbox( label="๐Ÿ“˜ Khmer Lesson Plan", lines=20, max_lines=200, show_copy_button=True, autoscroll=True ) with gr.Row(): gen_btn = gr.Button("โœ… แž”แž„แŸ’แž€แžพแžแž˜แŸแžšแŸ€แž“") gen_all_btn = gr.Button("๐Ÿง  แž”แž„แŸ’แž€แžพแžแž˜แŸแžšแŸ€แž“แž‘แžถแŸ†แž„แžขแžŸแŸ‹") clear_btn = gr.Button("๐Ÿงน แžŸแž˜แŸ’แžขแžถแž") gen_btn.click(fn=generate_lesson, inputs=[grade, topic, level], outputs=output_box) gen_all_btn.click(fn=generate_all_lessons, outputs=output_box) clear_btn.click(fn=lambda: "", outputs=output_box) demo.queue() demo.launch()