Spaces:

1Noura
/

ImageGeneration

Sleeping

File size: 3,256 Bytes

544f7f8
 
 
 
1cc6afc
544f7f8
 
 
 
95953e3
1cc6afc
 
 
 
 
 
 
 
 
 
 
 
0f9e8fd
d175e5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544f7f8
 
47a9da5
544f7f8
47a9da5
 
544f7f8
 
 
47a9da5
 
544f7f8
 
 
 
47a9da5
544f7f8
 
 
 
d175e5d
544f7f8
d175e5d
 
544f7f8
 
d175e5d
 
 
544f7f8
d175e5d
 
 
544f7f8
 
a71605b

import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
import wget

# Define the device to use (either "cuda" for GPU or "cpu" for CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the models
# Image captioning model to generate captions from uploaded images
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
# Stable Diffusion model for generating new images based on captions
sd_pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)

# Load the translation model (English to Arabic)
translator = pipeline(
    task="translation",
    model="facebook/nllb-200-distilled-600M",
    torch_dtype=torch.bfloat16,
    device=device
)

# Function to generate images based on the image's caption
def generate_image_and_translate(image, num_images=1):
    # Generate caption in English from the uploaded image
    caption_en = caption_image(image)[0]['generated_text']

    # Translate the English caption to Arabic
    caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text']

    generated_images = []

    # Generate the specified number of images based on the English caption
    for _ in range(num_images):
        generated_image = sd_pipeline(prompt=caption_en).images[0]
        generated_images.append(generated_image)

    # Return the generated images along with both captions
    return generated_images, caption_en, caption_ar

# Function to generate images based on the image's caption
def generate_image_and_translate(image, num_images=1):
    # Generate caption in English from the uploaded image
    caption_en = caption_image(image)[0]['generated_text']

    # Translate the English caption to Arabic
    caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text']

    generated_images = []

    # Generate the specified number of images based on the English caption
    for _ in range(num_images):
        generated_image = sd_pipeline(prompt=caption_en).images[0]
        generated_images.append(generated_image)

    # Return the generated images along with both captions
    return generated_images, caption_en, caption_ar

# Set up the Gradio interface
interface = gr.Interface(
    fn=generate_image_and_translate,   # Function to call when processing input
    inputs=[
        gr.Image(type="pil", label="📤 Upload Image"), # Input for image upload
        gr.Slider(minimum=1, maximum=10, label="🔢 Number of Images", value=1, step=1) # Slider to select number of images
    ],
    outputs=[
        gr.Gallery(label="🖼️ Generated Images"),
        gr.Textbox(label="📝 Generated Caption (English)", interactive=False),
        gr.Textbox(label="🌍 Translated Caption (Arabic)", interactive=False)
    ],
    title="Image Generation and Captioning", # Title of the interface
    description="Upload an image to extract a caption and display it in both Arabic and English. Then, a new image will be generated based on that caption.",  # Description
    theme='freddyaboulton/dracula_revamped' # Determine theme
)

# Launch the Gradio application
interface.launch()