starcoder / app.py
ilmosung's picture
Update app.py
e062736 verified
import spaces
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
title = """S&T ORA"""
model_path = "bigcode/starcoder2-15b"
model_path = "bigcode/starcoder2-3b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained( model_path)
device = 'cpu' # 'cuda'
@spaces.GPU
def generate_text(prompt, temperature=0.9, max_length=200):
# Encode the inputs
inputs = tokenizer.encode(prompt, return_tensors="pt")
attention_mask = torch.ones(inputs.shape, dtype=torch.long)
inputs = inputs.to(device)
attention_mask = attention_mask.to(device)
outputs = model.generate(
inputs,
attention_mask=attention_mask,
max_length=max_length,
top_p=0.9,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
return tokenizer.decode(outputs[0])
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown(title)
prompt = gr.Code(label="Enter your code prompt", value="def prime_number(n_max):")
with gr.Row():
temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature")
max_length = gr.Slider(minimum=100, maximum=1024, step=10, value=100, label="Generate Length")
generate_btn = gr.Button("Try✨StarCoder")
output = gr.Code(label="✨StarCoder:", lines=40)
generate_btn.click(
fn=generate_text,
inputs=[prompt, temperature, max_length],
outputs=output
)
demo.launch()
if __name__ == "__main__":
gradio_app()