import torch from PIL import Image from diffusers import DiffusionPipeline import gradio as gr import google.generativeai as genai import os from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Access the API key from the environment GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") # Error handling (optional) if not GOOGLE_API_KEY: raise ValueError("Missing GOOGLE_API_KEY environment variable. Please set it in your .env file.") # Configure the genai library genai.configure(api_key=GOOGLE_API_KEY) # Initialize Gemini models model1 = genai.GenerativeModel('gemini-1.0-pro-latest') model2 = genai.GenerativeModel('gemini-1.5-flash-latest') # Define the function to transform images model_path = "GiantAnalytics/sdxl_fine_tuned_model_aditya_2" pipe = DiffusionPipeline.from_pretrained(model_path, torch_dtype=torch.float16) # Set the device based on CUDA availability device = "cuda" if torch.cuda.is_available() else "cpu" pipe.to(device) def enhance_prompt_and_generate_images(image, prompt): if isinstance(image, np.ndarray): image = Image.fromarray(image.astype('uint8'), 'RGB') try: prompt11='''provide me all the information about texture of the design how it is looking and design of the input textile image in descriptive format It should provide like this Texture Details: , Design Details: and overall description of image''' # Step 1: Get an enhanced prompt using the Gemini API response1 = model2.generate_content([prompt11, image], stream=False) response1.resolve() initial_description = response1.text if initial_description: enhanced_prompt = f'''First, identify the user's specifications provided in the prompt: {user_input}. Understand the image details: {initial_description}. Now, generate a detailed prompt that combines the user inputs with the image details in a suitable way. This new prompt will help generate a new image with the SDXL model. The prompt should be concise and less than 100 tokens; curate it carefully. Focus on maintaining the theme and the overall feel of the design, incorporating subtle changes that enhance its uniqueness and visual appeal.''' response2 = model1.generate_content([enhanced_prompt], stream=False) response2.resolve() final_prompt = response2.text if response2.text else prompt else: final_prompt = prompt print(final_prompt) # Use original prompt if no description is available except Exception as e: print(f"Failed to enhance prompt via Gemini API: {e}") final_prompt = prompt # Use original prompt on any error # Step 2: Generate three image variations image_variations = [] settings = [(7.5, 0.5), (8.0, 0.6), (6.0, 0.4)] # Custom settings for guidance_scale and strength for i, (guidance, strength) in enumerate(settings): # Different settings for variations generator = torch.Generator(device=device).manual_seed(i * 100) output = pipe(prompt=final_prompt, image=image, guidance_scale=guidance, strength=strength, generator=generator).images[0] image_variations.append(output) return image_variations # Path to your local logo image logo_path = '/content/RCD-Final Logosmall size.jpg' # Replace with your image path with gr.Blocks() as demo: with gr.Row(): with gr.Column(scale=10): gr.Markdown( """

Text Guided Image-to-Image Generation

Enter a text prompt with required parameters to transform the Input Image using the Fine-Tuned SDXL Model.

""", elem_id="logo-container" ) with gr.Column(scale=1, elem_id="logo-column"): logo = gr.Image(value=logo_path, elem_id="logo", height=128, width=128) with gr.Row(): img_input = gr.Image(label="Upload Image") prompt_input = gr.Textbox(label="Enter your prompt") submit_btn = gr.Button("Generate") with gr.Row(): output_image1 = gr.Image(label="Variation 1") output_image2 = gr.Image(label="Variation 2") output_image3 = gr.Image(label="Variation 3") submit_btn.click( enhance_prompt_and_generate_images, inputs=[img_input, prompt_input], outputs=[output_image1, output_image2, output_image3] ) if __name__ == "__main__": demo.launch(debug=True)#inline=False)