Spaces:

teamaMohamed115
/

code-generation

Sleeping

App Files Files Community

code-generation / app.py

teamaMohamed115

Update app.py

e0a5676 verified about 1 month ago

raw

history blame contribute delete

3.5 kB

	import gradio as gr
	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora")
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# Safe loader: try with device_map for HF inference if possible
	print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)

	# Safe loader
	try:
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=True)
	except Exception:
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID)


	model.to(DEVICE)
	model.eval()

	# Generation helper
	GEN_KWARGS = dict(
	max_new_tokens=256,
	do_sample=True,
	temperature=0.2,
	top_p=0.95,
	top_k=50,
	num_return_sequences=1,
	)

	PROMPT_TEMPLATE = (
	"# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n"
	)


	def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95):
	if not instruction.strip():
	return "Please provide an instruction or problem statement."

	prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip())
	inputs = tokenizer(prompt, return_tensors="pt")
	input_ids = inputs["input_ids"].to(DEVICE)
	attention_mask = inputs.get("attention_mask")
	if attention_mask is not None:
	attention_mask = attention_mask.to(DEVICE)

	gen_kwargs = GEN_KWARGS.copy()
	gen_kwargs.update({
	"max_new_tokens": int(max_tokens),
	"temperature": float(temperature),
	"top_p": float(top_p),
	})

	with torch.no_grad():
	outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)

	decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Strip the prompt prefix from the decoded text if present
	if decoded.startswith(prompt):
	decoded = decoded[len(prompt):]

	return decoded.strip()


	with gr.Blocks(title="SmolLM Python Code Assistant") as demo:
	gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.")

	with gr.Row():
	instr = gr.Textbox(lines=6, placeholder="Describe the Python module you want...", label="Instruction")
	with gr.Column(scale=1):
	max_t = gr.Slider(minimum=32, maximum=1024, value=256, step=32, label="Max new tokens")
	temp = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Temperature")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p")
	run_btn = gr.Button("Generate")

	output = gr.Code(label="Generated Python module", language="python")

	def run(instruction, max_tokens, temperature, top_p):
	try:
	return generate_code(instruction, max_tokens, temperature, top_p)
	except Exception as e:
	return f"Error during generation: {e}"

	run_btn.click(run, inputs=[instr, max_t, temp, top_p], outputs=[output])

	gr.Examples(examples=[
	"Implement a Python module that includes: a function to compute Fibonacci sequence, a function to check primality, and a function to compute factorial, all with type hints and docstrings.",
	"Create a Python module for basic matrix operations (add, multiply, transpose) with appropriate error handling and tests.",
	], inputs=instr)

	if __name__ == "__main__":
	demo.launch()