import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline model_id = "codellama/CodeLlama-34b-hf" # You can change to 13b or 34b if you have the hardware tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def generate_code(prompt): output = pipe(prompt, max_new_tokens=200, do_sample=True, temperature=0.7) return output[0]["generated_text"] gr.Interface(fn=generate_code, inputs="text", outputs="text", title="Code Llama Playground").launch()