import spaces import gradio as gr import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer import os title = """S&T ORA""" model_path = "bigcode/starcoder2-15b" model_path = "bigcode/starcoder2-3b" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained( model_path) device = 'cpu' # 'cuda' @spaces.GPU def generate_text(prompt, temperature=0.9, max_length=200): # Encode the inputs inputs = tokenizer.encode(prompt, return_tensors="pt") attention_mask = torch.ones(inputs.shape, dtype=torch.long) inputs = inputs.to(device) attention_mask = attention_mask.to(device) outputs = model.generate( inputs, attention_mask=attention_mask, max_length=max_length, top_p=0.9, temperature=temperature, do_sample=True, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(outputs[0]) def gradio_app(): with gr.Blocks() as demo: gr.Markdown(title) prompt = gr.Code(label="Enter your code prompt", value="def prime_number(n_max):") with gr.Row(): temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature") max_length = gr.Slider(minimum=100, maximum=1024, step=10, value=100, label="Generate Length") generate_btn = gr.Button("Try✨StarCoder") output = gr.Code(label="✨StarCoder:", lines=40) generate_btn.click( fn=generate_text, inputs=[prompt, temperature, max_length], outputs=output ) demo.launch() if __name__ == "__main__": gradio_app()