Spaces:

DheepLearning
/

ITR

Running

App Files Files Community

ITR / app.py

Deepan13

with some changes with access

14a4094 8 days ago

raw

history blame contribute delete

7.46 kB

	import gradio as gr
	from transformers import AutoTokenizer
	import transformers
	import torch
	import os
	import time

	# Model configuration
	MODEL_NAME = "meta-llama/CodeLlama-7b-hf" # Using CodeLlama as requested

	# Default example prompts
	EXAMPLES = [
	["import socket\n\ndef ping_exponential_backoff(host: str):"],
	["def fibonacci(n: int) -> int:"],
	["class BinarySearchTree:\n def __init__(self):"],
	["async def fetch_data(url: str):"]
	]

	# Load model with error handling and authentication
	def load_model():
	try:
	print("Loading model and tokenizer...")

	# Get Hugging Face token from environment variable
	# This will be set in the Hugging Face Space settings
	hf_token = os.environ.get("HF_TOKEN")

	# If running locally and token is not set, try to use the token from git config
	if not hf_token:
	try:
	# Extract token from git config if available
	import subprocess
	git_url = subprocess.check_output(["git", "config", "--get", "remote.origin.url"]).decode().strip()
	if "@huggingface.co" in git_url:
	# Extract token from URL if it's in the format https://username:token@huggingface.co/...
	hf_token = git_url.split(":")[-2].split("/")[-1] if ":" in git_url else None
	if hf_token:
	print("Using token from git config")
	except Exception as e:
	print(f"Could not extract token from git config: {str(e)}")

	# Load tokenizer with token if available
	if hf_token:
	print("Using Hugging Face token for authentication")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token)

	# Configure the pipeline with token
	pipeline = transformers.pipeline(
	"text-generation",
	model=MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto",
	token=hf_token
	)
	else:
	# Try without token (will only work if model is public or user is logged in)
	print("No Hugging Face token found, trying without authentication")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	pipeline = transformers.pipeline(
	"text-generation",
	model=MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto",
	)

	print("Model loaded successfully!")
	return tokenizer, pipeline
	except Exception as e:
	print(f"Error loading model: {str(e)}")
	# Try to provide more helpful error message
	if "gated repo" in str(e) or "401" in str(e):
	print("\nIMPORTANT: CodeLlama is a gated model that requires authentication.")
	print("To use this model, you need to:")
	print("1. Accept the model's license at https://huggingface.co/meta-llama/CodeLlama-7b-hf")
	print("2. Set your Hugging Face token in the Space's settings")
	print(" (Settings > Repository Secrets > Add > HF_TOKEN)")

	# Return None to indicate failure
	return None, None

	# Generate code based on the prompt
	def generate_code(prompt, max_length=200, temperature=0.1, top_p=0.95, top_k=10):
	try:
	# Check if model is loaded
	if tokenizer is None or pipeline is None:
	return "Error: Model failed to load. Please check the logs."

	# Add a loading message
	start_time = time.time()

	# Generate the code
	sequences = pipeline(
	prompt,
	do_sample=True,
	top_k=top_k,
	temperature=temperature,
	top_p=top_p,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	max_length=max_length,
	)

	# Calculate generation time
	generation_time = time.time() - start_time

	# Format the result
	result = sequences[0]['generated_text']
	return f"{result}\n\n---\nGeneration time: {generation_time:.2f} seconds"

	except Exception as e:
	return f"Error generating code: {str(e)}"

	# Load the model and tokenizer
	print("Initializing CodeLlama-7b...")
	tokenizer, pipeline = load_model()

	# Create the Gradio interface
	with gr.Blocks(title="CodeLlama Code Generation") as demo:
	gr.Markdown("# CodeLlama Code Generation")
	gr.Markdown("Enter a code prompt and CodeLlama will complete it for you.")

	# Add a note about authentication if needed
	if tokenizer is None or pipeline is None:
	gr.Markdown("""
	## ⚠️ Authentication Required

	This demo requires authentication to access the CodeLlama model.

	To use this model, you need to:
	1. Accept the model's license at [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf)
	2. Set your Hugging Face token in the Space's settings (Settings > Repository Secrets > Add > HF_TOKEN)

	The demo will show a limited interface until authentication is set up.
	""")

	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(
	label="Code Prompt",
	placeholder="Enter your code prompt here...",
	lines=5
	)

	with gr.Row():
	max_length = gr.Slider(
	minimum=50,
	maximum=500,
	value=200,
	step=10,
	label="Max Length"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.1,
	step=0.1,
	label="Temperature"
	)

	with gr.Row():
	top_p = gr.Slider(
	minimum=0.5,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p"
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=50,
	value=10,
	step=1,
	label="Top-k"
	)

	generate_btn = gr.Button("Generate Code")

	with gr.Column():
	output = gr.Textbox(
	label="Generated Code",
	lines=20
	)

	# Connect the button to the generate function
	generate_btn.click(
	fn=generate_code,
	inputs=[prompt, max_length, temperature, top_p, top_k],
	outputs=output
	)

	# Add examples
	gr.Examples(
	examples=EXAMPLES,
	inputs=prompt
	)

	# Add information about the model
	gr.Markdown("""
	## About

	This demo uses the CodeLlama-7b model to generate code completions based on your prompts.

	- Max Length: Controls the maximum length of the generated text
	- Temperature: Controls randomness (lower = more deterministic)
	- Top-p: Controls diversity via nucleus sampling
	- Top-k: Controls diversity via top-k sampling

	Note: CodeLlama is a gated model that requires authentication. If you're seeing authentication errors,
	please follow the instructions at the top of the page.

	Created by DheepLearning
	""")

	# Launch the app
	demo.launch()