Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import os | |
# Check if the token is being accessed | |
hf_token = os.environ.get("HF_HOME", None) | |
# Load the model and tokenizer | |
model_name = "meta-llama/CodeLlama-7b-Python-hf" | |
model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token, torch_dtype="float16", device_map="auto") | |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token) | |
def generate_code(prompt): | |
batch_size = 10 | |
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
outputs = model.generate(inputs['input_ids'], max_length=512, num_return_sequences=batch_size) | |
code = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return code | |
# Set up the Gradio interface | |
demo = gr.Interface(fn=generate_code, | |
inputs="text", | |
outputs="text", | |
title="CodeLlama 7B Model", | |
description="Generate code with CodeLlama-7b-hf.").launch() | |