plp / app.py
Pisethan's picture
Update app.py
8a8c652 verified
import os
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import json
from huggingface_hub import HfApi, upload_file
# --- Constants ---
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_REPO = "Pisethan/khmer-lesson-dataset-generated"
LOCAL_JSONL = "generated_lessons.jsonl"
# --- Options ---
grade_options = ["1", "2", "3", "4", "5", "6"]
topic_options = ["Addition", "Subtraction", "Counting", "Number Recognition", "Multiplication", "Division"]
level_options = ["Beginner", "Intermediate", "Advanced"]
# --- Tokenizer (global) ---
tokenizer = AutoTokenizer.from_pretrained("Pisethan/khmer-lesson-model", token=HF_TOKEN)
# --- Helper to save and upload ---
def save_to_jsonl(record):
with open(LOCAL_JSONL, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
upload_file(
path_or_fileobj=LOCAL_JSONL,
path_in_repo="generated_lessons.jsonl",
repo_id=DATASET_REPO,
repo_type="dataset",
token=HF_TOKEN
)
# --- Generation for one lesson ---
@spaces.GPU
def generate_lesson(grade, topic, level):
device = 0 if torch.cuda.is_available() else -1
pipe = pipeline(
"text-generation",
model="Pisethan/khmer-lesson-model-v2",
tokenizer=tokenizer,
device=device,
token=HF_TOKEN
)
prompt = f"""
You are a lesson planning assistant. Return only one structured Khmer math lesson plan with these fields:
Lesson Title:
Objective:
Activity:
Instruction (Khmer):
Materials:
Please follow the structure exactly.
Grade: {grade}
Topic: {topic}
TaRL Level: {level}
"""
output = pipe(prompt, max_new_tokens=300, temperature=0.7, do_sample=True, eos_token_id=tokenizer.eos_token_id)
result = output[0]['generated_text']
# Save to dataset
record = {
"grade": grade,
"topic": topic,
"level": level,
"prompt": prompt.strip(),
"completion": result.strip()
}
save_to_jsonl(record)
return result
# --- Generation for all combinations ---
@spaces.GPU
def generate_all_lessons():
device = 0 if torch.cuda.is_available() else -1
pipe = pipeline(
"text-generation",
model="Pisethan/khmer-lesson-model-v2",
tokenizer=tokenizer,
device=device,
token=HF_TOKEN
)
results = ""
for grade in grade_options:
for topic in topic_options:
for level in level_options:
prompt = f"""Generate a Khmer math lesson plan.
Grade: {grade}
Topic: {topic}
TaRL Level: {level}"""
output = pipe(prompt, max_new_tokens=200, temperature=0.7, do_sample=True)
result = output[0]['generated_text']
record = {
"grade": grade,
"topic": topic,
"level": level,
"prompt": prompt.strip(),
"completion": result.strip()
}
save_to_jsonl(record)
results += f"πŸ”Ή αžαŸ’αž“αžΆαž€αŸ‹ {grade} | {topic} | {level}\n{result}\n\n{'-'*50}\n\n"
return results
# --- UI ---
with gr.Blocks() as demo:
gr.Markdown("## πŸ€– αž’αŸ’αž“αž€αž‡αŸ†αž“αž½αž™αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‚αžŽαž·αžαžœαž·αž‘αŸ’αž™αžΆ")
gr.Markdown("αž‡αŸ’αžšαžΎαžŸαžšαžΎαžŸαžαŸ’αž“αžΆαž€αŸ‹ αž”αŸ’αžšαž’αžΆαž“αž”αž‘ αž“αž·αž„αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ αžšαž½αž…αž…αž»αž…αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αŸ” αž¬αž…αž»αž…αž”αŸŠαžΌαžαž»αž„αžαžΆαž„αž€αŸ’αžšαŸ„αž˜αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹αŸ”")
with gr.Row():
grade = gr.Dropdown(choices=grade_options, label="αžαŸ’αž“αžΆαž€αŸ‹ (Grade)", value="1")
topic = gr.Dropdown(choices=topic_options, label="αž”αŸ’αžšαž’αžΆαž“αž”αž‘ (Topic)", value="Addition")
level = gr.Dropdown(choices=level_options, label="αž€αž˜αŸ’αžšαž·αžαžŸαž·αžŸαŸ’αžŸ (TaRL Level)", value="Beginner")
output_box = gr.Textbox(
label="πŸ“˜ Khmer Lesson Plan",
lines=20,
max_lines=200,
show_copy_button=True,
autoscroll=True
)
with gr.Row():
gen_btn = gr.Button("βœ… αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“")
gen_all_btn = gr.Button("🧠 αž”αž„αŸ’αž€αžΎαžαž˜αŸαžšαŸ€αž“αž‘αžΆαŸ†αž„αž’αžŸαŸ‹")
clear_btn = gr.Button("🧹 αžŸαž˜αŸ’αž’αžΆαž")
gen_btn.click(fn=generate_lesson, inputs=[grade, topic, level], outputs=output_box)
gen_all_btn.click(fn=generate_all_lessons, outputs=output_box)
clear_btn.click(fn=lambda: "", outputs=output_box)
demo.queue()
demo.launch()