File size: 7,458 Bytes
d4417c8
aff08a4
 
 
 
 
 
 
14a4094
aff08a4
 
 
 
 
 
 
 
 
14a4094
aff08a4
 
 
14a4094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aff08a4
 
 
 
 
14a4094
 
 
 
 
 
 
 
aff08a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14a4094
aff08a4
 
 
 
 
 
 
14a4094
 
 
 
 
 
 
 
 
 
 
 
 
 
aff08a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14a4094
 
 
aff08a4
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import gradio as gr
from transformers import AutoTokenizer
import transformers
import torch
import os
import time

# Model configuration
MODEL_NAME = "meta-llama/CodeLlama-7b-hf"  # Using CodeLlama as requested

# Default example prompts
EXAMPLES = [
    ["import socket\n\ndef ping_exponential_backoff(host: str):"],
    ["def fibonacci(n: int) -> int:"],
    ["class BinarySearchTree:\n    def __init__(self):"],
    ["async def fetch_data(url: str):"]
]

# Load model with error handling and authentication
def load_model():
    try:
        print("Loading model and tokenizer...")

        # Get Hugging Face token from environment variable
        # This will be set in the Hugging Face Space settings
        hf_token = os.environ.get("HF_TOKEN")

        # If running locally and token is not set, try to use the token from git config
        if not hf_token:
            try:
                # Extract token from git config if available
                import subprocess
                git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip()
                if "@huggingface.co" in git_url:
                    # Extract token from URL if it's in the format https://username:token@huggingface.co/...
                    hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None
                    if hf_token:
                        print("Using token from git config")
            except Exception as e:
                print(f"Could not extract token from git config: {str(e)}")

        # Load tokenizer with token if available
        if hf_token:
            print("Using Hugging Face token for authentication")
            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)

            # Configure the pipeline with token
            pipeline = transformers.pipeline(
                "text-generation",
                model=MODEL_NAME,
                torch_dtype=torch.float16,
                device_map="auto",
                token=hf_token
            )
        else:
            # Try without token (will only work if model is public or user is logged in)
            print("No Hugging Face token found, trying without authentication")
            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

            pipeline = transformers.pipeline(
                "text-generation",
                model=MODEL_NAME,
                torch_dtype=torch.float16,
                device_map="auto",
            )

        print("Model loaded successfully!")
        return tokenizer, pipeline
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        # Try to provide more helpful error message
        if "gated repo" in str(e) or "401" in str(e):
            print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.")
            print("To use this model, you need to:")
            print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf")
            print("2. Set your Hugging Face token in the Space's settings")
            print("   (Settings > Repository Secrets > Add > HF_TOKEN)")

        # Return None to indicate failure
        return None, None

# Generate code based on the prompt
def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
    try:
        # Check if model is loaded
        if tokenizer is None or pipeline is None:
            return "Error: Model failed to load. Please check the logs."

        # Add a loading message
        start_time = time.time()

        # Generate the code
        sequences = pipeline(
            prompt,
            do_sample=True,
            top_k=top_k,
            temperature=temperature,
            top_p=top_p,
            num_return_sequences=1,
            eos_token_id=tokenizer.eos_token_id,
            max_length=max_length,
        )

        # Calculate generation time
        generation_time = time.time() - start_time

        # Format the result
        result = sequences[0]['generated_text']
        return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

    except Exception as e:
        return f"Error generating code: {str(e)}"

# Load the model and tokenizer
print("Initializing CodeLlama-7b...")
tokenizer, pipeline = load_model()

# Create the Gradio interface
with gr.Blocks(title="CodeLlama Code Generation") as demo:
    gr.Markdown("# CodeLlama Code Generation")
    gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

    # Add a note about authentication if needed
    if tokenizer is None or pipeline is None:
        gr.Markdown("""
        ## ⚠️ Authentication Required

        This demo requires authentication to access the CodeLlama model.

        To use this model, you need to:
        1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
        2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN)

        The demo will show a limited interface until authentication is set up.
        """)

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Code Prompt",
                placeholder="Enter your code prompt here...",
                lines=5
            )

            with gr.Row():
                max_length = gr.Slider(
                    minimum=50,
                    maximum=500,
                    value=200,
                    step=10,
                    label="Max Length"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.1,
                    step=0.1,
                    label="Temperature"
                )

            with gr.Row():
                top_p = gr.Slider(
                    minimum=0.5,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                top_k = gr.Slider(
                    minimum=1,
                    maximum=50,
                    value=10,
                    step=1,
                    label="Top-k"
                )

            generate_btn = gr.Button("Generate Code")

        with gr.Column():
            output = gr.Textbox(
                label="Generated Code",
                lines=20
            )

    # Connect the button to the generate function
    generate_btn.click(
        fn=generate_code,
        inputs=[prompt, max_length, temperature, top_p, top_k],
        outputs=output
    )

    # Add examples
    gr.Examples(
        examples=EXAMPLES,
        inputs=prompt
    )

    # Add information about the model
    gr.Markdown("""
    ## About

    This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

    - **Max Length**: Controls the maximum length of the generated text
    - **Temperature**: Controls randomness (lower = more deterministic)
    - **Top-p**: Controls diversity via nucleus sampling
    - **Top-k**: Controls diversity via top-k sampling

    **Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors,
    please follow the instructions at the top of the page.

    Created by DheepLearning
    """)

# Launch the app
demo.launch()