Replit / app.py
shadowBoy14's picture
Create app.py
ab0d06e verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
# 1. Setup for low memory (Free tier friendly)
model_id = "replit/replit-code-v1_5-3b"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
# 2. Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=quantization_config,
device_map="auto"
)
def generate(prompt):
inputs = tokenizer.encode(prompt, return_tensors='pt').to("cuda")
outputs = model.generate(
inputs,
max_new_tokens=100,
do_sample=True,
temperature=0.2
)
return tokenizer.decode(outputs[0])
# 3. Create the UI
demo = gr.Interface(
fn=generate,
inputs=gr.Textbox(lines=5, label="Input Code/Prompt"),
outputs=gr.Code(label="Generated Code"),
title="Replit Code 3B Demo"
)
demo.launch()