import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained( "stabilityai/stable-code-3b", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( "stabilityai/stable-code-3b", trust_remote_code=True, torch_dtype="auto", ).to("cuda" if torch.cuda.is_available() else "cpu") # Check for GPU availability # Define the main function for code generation def generate_code(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) tokens = model.generate( **inputs, max_new_tokens=48, temperature=0.2, do_sample=True, ) generated_code = tokenizer.decode(tokens[0], skip_special_tokens=True) return generated_code # Define the Gradio interface iface = gr.Interface( fn=generate_code, inputs=[gr.Textbox(lines=2, placeholder="Enter your Python code prompt")], outputs="textbox", title="Python Code Completion", description="Generate code completions using a large language model.", ) # Launch the Gradio app iface.launch()