Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoTokenizer | |
| import transformers | |
| import torch | |
| import os | |
| import time | |
| # Model configuration | |
| MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Using CodeLlama as requested | |
| # Default example prompts | |
| EXAMPLES = [ | |
| ["import socket\n\ndef ping_exponential_backoff(host: str):"], | |
| ["def fibonacci(n: int) -> int:"], | |
| ["class BinarySearchTree:\n def __init__(self):"], | |
| ["async def fetch_data(url: str):"] | |
| ] | |
| # Load model with error handling and authentication | |
| def load_model(): | |
| try: | |
| print("Loading model and tokenizer...") | |
| # Get Hugging Face token from environment variable | |
| # This will be set in the Hugging Face Space settings | |
| hf_token = os.environ.get("HF_TOKEN") | |
| # If running locally and token is not set, try to use the token from git config | |
| if not hf_token: | |
| try: | |
| # Extract token from git config if available | |
| import subprocess | |
| git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip() | |
| if "@huggingface.co" in git_url: | |
| # Extract token from URL if it's in the format https://username:token@huggingface.co/... | |
| hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None | |
| if hf_token: | |
| print("Using token from git config") | |
| except Exception as e: | |
| print(f"Could not extract token from git config: {str(e)}") | |
| # Load tokenizer with token if available | |
| if hf_token: | |
| print("Using Hugging Face token for authentication") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) | |
| # Configure the pipeline with token | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=MODEL_NAME, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| token=hf_token | |
| ) | |
| else: | |
| # Try without token (will only work if model is public or user is logged in) | |
| print("No Hugging Face token found, trying without authentication") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=MODEL_NAME, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| ) | |
| print("Model loaded successfully!") | |
| return tokenizer, pipeline | |
| except Exception as e: | |
| print(f"Error loading model: {str(e)}") | |
| # Try to provide more helpful error message | |
| if "gated repo" in str(e) or "401" in str(e): | |
| print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.") | |
| print("To use this model, you need to:") | |
| print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf") | |
| print("2. Set your Hugging Face token in the Space's settings") | |
| print(" (Settings > Repository Secrets > Add > HF_TOKEN)") | |
| # Return None to indicate failure | |
| return None, None | |
| # Generate code based on the prompt | |
| def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10): | |
| try: | |
| # Check if model is loaded | |
| if tokenizer is None or pipeline is None: | |
| return "Error: Model failed to load. Please check the logs." | |
| # Add a loading message | |
| start_time = time.time() | |
| # Generate the code | |
| sequences = pipeline( | |
| prompt, | |
| do_sample=True, | |
| top_k=top_k, | |
| temperature=temperature, | |
| top_p=top_p, | |
| num_return_sequences=1, | |
| eos_token_id=tokenizer.eos_token_id, | |
| max_length=max_length, | |
| ) | |
| # Calculate generation time | |
| generation_time = time.time() - start_time | |
| # Format the result | |
| result = sequences[0]['generated_text'] | |
| return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds" | |
| except Exception as e: | |
| return f"Error generating code: {str(e)}" | |
| # Load the model and tokenizer | |
| print("Initializing CodeLlama-7b...") | |
| tokenizer, pipeline = load_model() | |
| # Create the Gradio interface | |
| with gr.Blocks(title="CodeLlama Code Generation") as demo: | |
| gr.Markdown("# CodeLlama Code Generation") | |
| gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.") | |
| # Add a note about authentication if needed | |
| if tokenizer is None or pipeline is None: | |
| gr.Markdown(""" | |
| ## ⚠️ Authentication Required | |
| This demo requires authentication to access the CodeLlama model. | |
| To use this model, you need to: | |
| 1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | |
| 2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN) | |
| The demo will show a limited interface until authentication is set up. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox( | |
| label="Code Prompt", | |
| placeholder="Enter your code prompt here...", | |
| lines=5 | |
| ) | |
| with gr.Row(): | |
| max_length = gr.Slider( | |
| minimum=50, | |
| maximum=500, | |
| value=200, | |
| step=10, | |
| label="Max Length" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.1, | |
| step=0.1, | |
| label="Temperature" | |
| ) | |
| with gr.Row(): | |
| top_p = gr.Slider( | |
| minimum=0.5, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p" | |
| ) | |
| top_k = gr.Slider( | |
| minimum=1, | |
| maximum=50, | |
| value=10, | |
| step=1, | |
| label="Top-k" | |
| ) | |
| generate_btn = gr.Button("Generate Code") | |
| with gr.Column(): | |
| output = gr.Textbox( | |
| label="Generated Code", | |
| lines=20 | |
| ) | |
| # Connect the button to the generate function | |
| generate_btn.click( | |
| fn=generate_code, | |
| inputs=[prompt, max_length, temperature, top_p, top_k], | |
| outputs=output | |
| ) | |
| # Add examples | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=prompt | |
| ) | |
| # Add information about the model | |
| gr.Markdown(""" | |
| ## About | |
| This demo uses the CodeLlama-7b model to generate code completions based on your prompts. | |
| - **Max Length**: Controls the maximum length of the generated text | |
| - **Temperature**: Controls randomness (lower = more deterministic) | |
| - **Top-p**: Controls diversity via nucleus sampling | |
| - **Top-k**: Controls diversity via top-k sampling | |
| **Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors, | |
| please follow the instructions at the top of the page. | |
| Created by DheepLearning | |
| """) | |
| # Launch the app | |
| demo.launch() | |