Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora") | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Safe loader: try with device_map for HF inference if possible | |
| print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True) | |
| # Safe loader | |
| try: | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| except Exception: | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_ID) | |
| model.to(DEVICE) | |
| model.eval() | |
| # Generation helper | |
| GEN_KWARGS = dict( | |
| max_new_tokens=256, | |
| do_sample=True, | |
| temperature=0.2, | |
| top_p=0.95, | |
| top_k=50, | |
| num_return_sequences=1, | |
| ) | |
| PROMPT_TEMPLATE = ( | |
| "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n" | |
| ) | |
| def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95): | |
| if not instruction.strip(): | |
| return "Please provide an instruction or problem statement." | |
| prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip()) | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| input_ids = inputs["input_ids"].to(DEVICE) | |
| attention_mask = inputs.get("attention_mask") | |
| if attention_mask is not None: | |
| attention_mask = attention_mask.to(DEVICE) | |
| gen_kwargs = GEN_KWARGS.copy() | |
| gen_kwargs.update({ | |
| "max_new_tokens": int(max_tokens), | |
| "temperature": float(temperature), | |
| "top_p": float(top_p), | |
| }) | |
| with torch.no_grad(): | |
| outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs) | |
| decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Strip the prompt prefix from the decoded text if present | |
| if decoded.startswith(prompt): | |
| decoded = decoded[len(prompt):] | |
| return decoded.strip() | |
| with gr.Blocks(title="SmolLM Python Code Assistant") as demo: | |
| gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.") | |
| with gr.Row(): | |
| instr = gr.Textbox(lines=6, placeholder="Describe the Python module you want...", label="Instruction") | |
| with gr.Column(scale=1): | |
| max_t = gr.Slider(minimum=32, maximum=1024, value=256, step=32, label="Max new tokens") | |
| temp = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Temperature") | |
| top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p") | |
| run_btn = gr.Button("Generate") | |
| output = gr.Code(label="Generated Python module", language="python") | |
| def run(instruction, max_tokens, temperature, top_p): | |
| try: | |
| return generate_code(instruction, max_tokens, temperature, top_p) | |
| except Exception as e: | |
| return f"Error during generation: {e}" | |
| run_btn.click(run, inputs=[instr, max_t, temp, top_p], outputs=[output]) | |
| gr.Examples(examples=[ | |
| "Implement a Python module that includes: a function to compute Fibonacci sequence, a function to check primality, and a function to compute factorial, all with type hints and docstrings.", | |
| "Create a Python module for basic matrix operations (add, multiply, transpose) with appropriate error handling and tests.", | |
| ], inputs=instr) | |
| if __name__ == "__main__": | |
| demo.launch() |