Spaces:

DheepLearning
/

ITR

Sleeping

App Files Files Community

Deepan13 commited on May 7

Commit

14a4094

1 Parent(s): aff08a4

with some changes with access

Browse files

Files changed (3) hide show

README.md +12 -1
__pycache__/app.cpython-312.pyc +0 -0
app.py +71 -13

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ This is an interactive demo of the CodeLlama-7b model for generating code comple
 ## Features
-- Interactive code generation with CodeLlama-7b
 - Adjustable parameters (temperature, max length, etc.)
 - Example prompts to get started quickly
 - Real-time generation with timing information
@@ -42,6 +42,17 @@ The demo includes several example prompts to help you get started:
 - Binary search tree class
 - Asynchronous data fetching function
 ## Technical Details
 This demo uses:

 ## Features
+- Interactive code generation with CodeLlama-7b model
 - Adjustable parameters (temperature, max length, etc.)
 - Example prompts to get started quickly
 - Real-time generation with timing information
 - Binary search tree class
 - Asynchronous data fetching function
+## Authentication Requirements
+**Important**: CodeLlama is a gated model that requires authentication to access. To use this demo:
+1. You must accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
+2. You need to set your Hugging Face token in the Space's settings:
+   - Go to Settings > Repository Secrets > Add
+   - Create a secret named `HF_TOKEN` with your Hugging Face token as the value
+Without proper authentication, the demo will show a limited interface with instructions.
 ## Technical Details
 This demo uses:

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (7.54 kB). View file

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import time
 # Model configuration
-MODEL_NAME = "meta-llama/CodeLlama-7b-hf"
 # Default example prompts
 EXAMPLES = [
@@ -16,25 +16,66 @@ EXAMPLES = [
     ["async def fetch_data(url: str):"]
 ]
-# Load model with error handling
 def load_model():
     try:
         print("Loading model and tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        # Configure the pipeline based on available resources
-        # Hugging Face Spaces typically have GPU available
-        pipeline = transformers.pipeline(
-            "text-generation",
-            model=MODEL_NAME,
-            torch_dtype=torch.float16,
-            device_map="auto",
-        )
         print("Model loaded successfully!")
         return tokenizer, pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
         # Return None to indicate failure
         return None, None
@@ -71,7 +112,7 @@ def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10)
         return f"Error generating code: {str(e)}"
 # Load the model and tokenizer
-print("Initializing CodeLlama...")
 tokenizer, pipeline = load_model()
 # Create the Gradio interface
@@ -79,6 +120,20 @@ with gr.Blocks(title="CodeLlama Code Generation") as demo:
     gr.Markdown("# CodeLlama Code Generation")
     gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox(
@@ -151,6 +206,9 @@ with gr.Blocks(title="CodeLlama Code Generation") as demo:
     - **Top-p**: Controls diversity via nucleus sampling
     - **Top-k**: Controls diversity via top-k sampling
     Created by DheepLearning
     """)

 import time
 # Model configuration
+MODEL_NAME = "meta-llama/CodeLlama-7b-hf"  # Using CodeLlama as requested
 # Default example prompts
 EXAMPLES = [
     ["async def fetch_data(url: str):"]
 ]
+# Load model with error handling and authentication
 def load_model():
     try:
         print("Loading model and tokenizer...")
+        # Get Hugging Face token from environment variable
+        # This will be set in the Hugging Face Space settings
+        hf_token = os.environ.get("HF_TOKEN")
+        # If running locally and token is not set, try to use the token from git config
+        if not hf_token:
+            try:
+                # Extract token from git config if available
+                import subprocess
+                git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip()
+                if "@huggingface.co" in git_url:
+                    # Extract token from URL if it's in the format https://username:token@huggingface.co/...
+                    hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None
+                    if hf_token:
+                        print("Using token from git config")
+            except Exception as e:
+                print(f"Could not extract token from git config: {str(e)}")
+        # Load tokenizer with token if available
+        if hf_token:
+            print("Using Hugging Face token for authentication")
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)
+            # Configure the pipeline with token
+            pipeline = transformers.pipeline(
+                "text-generation",
+                model=MODEL_NAME,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                token=hf_token
+            )
+        else:
+            # Try without token (will only work if model is public or user is logged in)
+            print("No Hugging Face token found, trying without authentication")
+            tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+            pipeline = transformers.pipeline(
+                "text-generation",
+                model=MODEL_NAME,
+                torch_dtype=torch.float16,
+                device_map="auto",
+            )
         print("Model loaded successfully!")
         return tokenizer, pipeline
     except Exception as e:
         print(f"Error loading model: {str(e)}")
+        # Try to provide more helpful error message
+        if "gated repo" in str(e) or "401" in str(e):
+            print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.")
+            print("To use this model, you need to:")
+            print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf")
+            print("2. Set your Hugging Face token in the Space's settings")
+            print("   (Settings > Repository Secrets > Add > HF_TOKEN)")
         # Return None to indicate failure
         return None, None
         return f"Error generating code: {str(e)}"
 # Load the model and tokenizer
+print("Initializing CodeLlama-7b...")
 tokenizer, pipeline = load_model()
 # Create the Gradio interface
     gr.Markdown("# CodeLlama Code Generation")
     gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")
+    # Add a note about authentication if needed
+    if tokenizer is None or pipeline is None:
+        gr.Markdown("""
+        ## ⚠️ Authentication Required
+        This demo requires authentication to access the CodeLlama model.
+        To use this model, you need to:
+        1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
+        2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN)
+        The demo will show a limited interface until authentication is set up.
+        """)
     with gr.Row():
         with gr.Column():
             prompt = gr.Textbox(
     - **Top-p**: Controls diversity via nucleus sampling
     - **Top-k**: Controls diversity via top-k sampling
+    **Note**: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors,
+    please follow the instructions at the top of the page.
     Created by DheepLearning
     """)