Spaces:

EmTpro01
/

Python-code-Genration

Running

App Files Files Community

Python-code-Genration / app.py

EmTpro01

Update app.py

bd1c7d4 verified 10 months ago

raw

history blame

4.83 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	from peft import PeftModel
	import logging
	import os
	from huggingface_hub import snapshot_download

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def download_lora_weights():
	"""Download LoRA weights from Hugging Face"""
	return snapshot_download(
	repo_id="EmTpro01/Llama-3.2-3B-peft",
	allow_patterns=["adapter_config.json", "adapter_model.bin"],
	)

	def load_model_with_lora():
	"""
	Load Llama model and merge it with LoRA adapter
	"""
	try:
	# Configure quantization
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.float16
	)

	# Load base model
	base_model = AutoModelForCausalLM.from_pretrained(
	"unsloth/llama-3.2-3b-bnb-4bit",
	quantization_config=bnb_config,
	device_map="auto",
	trust_remote_code=True
	)
	logger.info("Successfully loaded base model")

	# Download and load LoRA adapter
	lora_path = download_lora_weights()
	logger.info(f"Downloaded LoRA weights to: {lora_path}")

	# Load and merge LoRA adapter
	model = PeftModel.from_pretrained(base_model, lora_path)
	logger.info("Successfully loaded LoRA adapter")

	# For inference, we can merge the LoRA weights with the base model
	model = model.merge_and_unload()
	logger.info("Successfully merged LoRA weights with base model")

	return model

	except Exception as e:
	logger.error(f"Error loading model: {str(e)}")
	raise RuntimeError(f"Failed to load model: {str(e)}")

	def load_tokenizer():
	"""
	Load tokenizer for the Llama model
	"""
	try:
	tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3.2-3b-bnb-4bit")
	logger.info("Successfully loaded tokenizer")
	return tokenizer
	except Exception as e:
	logger.error(f"Error loading tokenizer: {str(e)}")
	raise RuntimeError(f"Failed to load tokenizer: {str(e)}")

	def generate_code(prompt, model, tokenizer, max_length=512, temperature=0.7):
	"""
	Generate code based on the prompt
	"""
	try:
	# Add any specific prompt template if needed
	formatted_prompt = f"### Instruction: Write code for the following task:\n{prompt}\n\n### Response:"

	inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

	outputs = model.generate(
	**inputs,
	max_length=max_length,
	temperature=temperature,
	do_sample=True,
	top_p=0.95,
	top_k=50,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.eos_token_id
	)

	generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# Extract only the response part
	response = generated_text.split("### Response:")[-1].strip()
	return response
	except Exception as e:
	logger.error(f"Error during code generation: {str(e)}")
	return f"Error generating code: {str(e)}"

	# Initialize model and tokenizer
	logger.info("Starting model initialization...")
	model = load_model_with_lora()
	tokenizer = load_tokenizer()
	logger.info("Model initialization completed successfully")

	# Create Gradio interface with error handling
	def gradio_generate(prompt, temperature, max_length):
	try:
	return generate_code(prompt, model, tokenizer, max_length, temperature)
	except Exception as e:
	return f"Error: {str(e)}"

	# Create the Gradio interface
	demo = gr.Interface(
	fn=gradio_generate,
	inputs=[
	gr.Textbox(
	lines=5,
	placeholder="Enter your code generation prompt here...",
	label="Prompt"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=64,
	maximum=2048,
	value=512,
	step=64,
	label="Max Length"
	)
	],
	outputs=gr.Code(label="Generated Code"),
	title="Llama Code Generation with LoRA",
	description="Enter a prompt to generate code using Llama 3.2 3B model fine-tuned with LoRA",
	examples=[
	["Write a Python function to sort a list of numbers in ascending order"],
	["Create a simple REST API using FastAPI that handles GET and POST requests"],
	["Write a function to check if a string is a palindrome"]
	]
	)

	if __name__ == "__main__":
	demo.launch()