Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer | |
import transformers | |
import torch | |
import os | |
import time | |
# Model configuration | |
MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Using CodeLlama as requested | |
# Default example prompts | |
EXAMPLES = [ | |
["import socket\n\ndef ping_exponential_backoff(host: str):"], | |
["def fibonacci(n: int) -> int:"], | |
["class BinarySearchTree:\n def __init__(self):"], | |
["async def fetch_data(url: str):"] | |
] | |
# Load model with error handling and authentication | |
def load_model(): | |
try: | |
print("Loading model and tokenizer...") | |
# Get Hugging Face token from environment variable | |
# This will be set in the Hugging Face Space settings | |
hf_token = os.environ.get("HF_TOKEN") | |
# If running locally and token is not set, try to use the token from git config | |
if not hf_token: | |
try: | |
# Extract token from git config if available | |
import subprocess | |
git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip() | |
if "@huggingface.co" in git_url: | |
# Extract token from URL if it's in the format https://username:token@huggingface.co/... | |
hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None | |
if hf_token: | |
print("Using token from git config") | |
except Exception as e: | |
print(f"Could not extract token from git config: {str(e)}") | |
# Load tokenizer with token if available | |
if hf_token: | |
print("Using Hugging Face token for authentication") | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) | |
# Configure the pipeline with token | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=MODEL_NAME, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
token=hf_token | |
) | |
else: | |
# Try without token (will only work if model is public or user is logged in) | |
print("No Hugging Face token found, trying without authentication") | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=MODEL_NAME, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
) | |
print("Model loaded successfully!") | |
return tokenizer, pipeline | |
except Exception as e: | |
print(f"Error loading model: {str(e)}") | |
# Try to provide more helpful error message | |
if "gated repo" in str(e) or "401" in str(e): | |
print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.") | |
print("To use this model, you need to:") | |
print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf") | |
print("2. Set your Hugging Face token in the Space's settings") | |
print(" (Settings > Repository Secrets > Add > HF_TOKEN)") | |
# Return None to indicate failure | |
return None, None | |
# Generate code based on the prompt | |
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10): | |
try: | |
# Check if model is loaded | |
if tokenizer is None or pipeline is None: | |
return "Error: Model failed to load. Please check the logs." | |
# Add a loading message | |
start_time = time.time() | |
# Generate the code | |
sequences = pipeline( | |
prompt, | |
do_sample=True, | |
top_k=top_k, | |
temperature=temperature, | |
top_p=top_p, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
max_length=max_length, | |
) | |
# Calculate generation time | |
generation_time = time.time() - start_time | |
# Format the result | |
result = sequences[0]['generated_text'] | |
return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds" | |
except Exception as e: | |
return f"Error generating code: {str(e)}" | |
# Load the model and tokenizer | |
print("Initializing CodeLlama-7b...") | |
tokenizer, pipeline = load_model() | |
# Create the Gradio interface | |
with gr.Blocks(title="CodeLlama Code Generation") as demo: | |
gr.Markdown("# CodeLlama Code Generation") | |
gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.") | |
# Add a note about authentication if needed | |
if tokenizer is None or pipeline is None: | |
gr.Markdown(""" | |
## ⚠️ Authentication Required | |
This demo requires authentication to access the CodeLlama model. | |
To use this model, you need to: | |
1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | |
2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN) | |
The demo will show a limited interface until authentication is set up. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox( | |
label="Code Prompt", | |
placeholder="Enter your code prompt here...", | |
lines=5 | |
) | |
with gr.Row(): | |
max_length = gr.Slider( | |
minimum=50, | |
maximum=500, | |
value=200, | |
step=10, | |
label="Max Length" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.1, | |
step=0.1, | |
label="Temperature" | |
) | |
with gr.Row(): | |
top_p = gr.Slider( | |
minimum=0.5, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p" | |
) | |
top_k = gr.Slider( | |
minimum=1, | |
maximum=50, | |
value=10, | |
step=1, | |
label="Top-k" | |
) | |
generate_btn = gr.Button("Generate Code") | |
with gr.Column(): | |
output = gr.Textbox( | |
label="Generated Code", | |
lines=20 | |
) | |
# Connect the button to the generate function | |
generate_btn.click( | |
fn=generate_code, | |
inputs=[prompt, max_length, temperature, top_p, top_k], | |
outputs=output | |
) | |
# Add examples | |
gr.Examples( | |
examples=EXAMPLES, | |
inputs=prompt | |
) | |
# Add information about the model | |
gr.Markdown(""" | |
## About | |
This demo uses the CodeLlama-7b model to generate code completions based on your prompts. | |
- **Max Length**: Controls the maximum length of the generated text | |
- **Temperature**: Controls randomness (lower = more deterministic) | |
- **Top-p**: Controls diversity via nucleus sampling | |
- **Top-k**: Controls diversity via top-k sampling | |
**Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors, | |
please follow the instructions at the top of the page. | |
Created by DheepLearning | |
""") | |
# Launch the app | |
demo.launch() | |