Create app.py

18d06ea verified 3 months ago

4.59 kB

	import torch
	from PIL import Image
	from diffusers import DiffusionPipeline
	import gradio as gr
	import google.generativeai as genai
	import os
	from dotenv import load_dotenv

	# Load environment variables from .env file
	load_dotenv()

	# Access the API key from the environment
	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

	# Error handling (optional)
	if not GOOGLE_API_KEY:
	raise ValueError("Missing GOOGLE_API_KEY environment variable. Please set it in your .env file.")

	# Configure the genai library
	genai.configure(api_key=GOOGLE_API_KEY)

	# Initialize Gemini models
	model1 = genai.GenerativeModel('gemini-1.0-pro-latest')
	model2 = genai.GenerativeModel('gemini-1.5-flash-latest')

	# Define the function to transform images

	model_path = "GiantAnalytics/sdxl_fine_tuned_model_aditya_2"
	pipe = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16)

	# Set the device based on CUDA availability
	device = "cuda" if torch.cuda.is_available() else "cpu"
	pipe.to(device)

	def enhance_prompt_and_generate_images(image, prompt):
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image.astype('uint8'), 'RGB')
	try:
	prompt11='''provide me all the information about texture of the design how it is looking and design of the input textile image in descriptive format
	It should provide like this Texture Details: , Design Details: and overall description of image'''
	# Step 1: Get an enhanced prompt using the Gemini API
	response1 = model2.generate_content([prompt11, image], stream=False)
	response1.resolve()
	initial_description = response1.text

	if initial_description:
	enhanced_prompt = f'''First, identify the user's specifications provided in the prompt: {user_input}.
	Understand the image details: {initial_description}. Now, generate a detailed prompt that combines the user inputs with the image details in a suitable way.
	This new prompt will help generate a new image with the SDXL model. The prompt should be concise and less than 100 tokens; curate it carefully.
	Focus on maintaining the theme and the overall feel of the design, incorporating subtle changes that enhance its uniqueness and visual appeal.'''
	response2 = model1.generate_content([enhanced_prompt], stream=False)
	response2.resolve()
	final_prompt = response2.text if response2.text else prompt
	else:
	final_prompt = prompt
	print(final_prompt) # Use original prompt if no description is available

	except Exception as e:
	print(f"Failed to enhance prompt via Gemini API: {e}")
	final_prompt = prompt # Use original prompt on any error

	# Step 2: Generate three image variations
	image_variations = []
	settings = [(7.5, 0.5), (8.0, 0.6), (6.0, 0.4)] # Custom settings for guidance_scale and strength
	for i, (guidance, strength) in enumerate(settings): # Different settings for variations
	generator = torch.Generator(device=device).manual_seed(i * 100)
	output = pipe(prompt=final_prompt, image=image, guidance_scale=guidance, strength=strength, generator=generator).images[0]
	image_variations.append(output)

	return image_variations

	# Path to your local logo image
	logo_path = '/content/RCD-Final Logosmall size.jpg' # Replace with your image path

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column(scale=10):
	gr.Markdown(
	"""
	<div id="logo-container">
	<h1>Text Guided Image-to-Image Generation</h1>
	<p>Enter a text prompt with required parameters to transform the Input Image using the Fine-Tuned SDXL Model.</p>
	</div>
	""",
	elem_id="logo-container"
	)
	with gr.Column(scale=1, elem_id="logo-column"):
	logo = gr.Image(value=logo_path, elem_id="logo", height=128, width=128)

	with gr.Row():
	img_input = gr.Image(label="Upload Image")
	prompt_input = gr.Textbox(label="Enter your prompt")
	submit_btn = gr.Button("Generate")

	with gr.Row():
	output_image1 = gr.Image(label="Variation 1")
	output_image2 = gr.Image(label="Variation 2")
	output_image3 = gr.Image(label="Variation 3")

	submit_btn.click(
	enhance_prompt_and_generate_images,
	inputs=[img_input, prompt_input],
	outputs=[output_image1, output_image2, output_image3]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)#inline=False)