File size: 3,256 Bytes
544f7f8
 
 
 
1cc6afc
544f7f8
 
 
 
95953e3
1cc6afc
 
 
 
 
 
 
 
 
 
 
 
0f9e8fd
d175e5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544f7f8
 
47a9da5
544f7f8
47a9da5
 
544f7f8
 
 
47a9da5
 
544f7f8
 
 
 
47a9da5
544f7f8
 
 
 
d175e5d
544f7f8
d175e5d
 
544f7f8
 
d175e5d
 
 
544f7f8
d175e5d
 
 
544f7f8
 
a71605b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
import wget

# Define the device to use (either "cuda" for GPU or "cpu" for CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the models
# Image captioning model to generate captions from uploaded images
caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
# Stable Diffusion model for generating new images based on captions
sd_pipeline = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)

# Load the translation model (English to Arabic)
translator = pipeline(
    task="translation",
    model="facebook/nllb-200-distilled-600M",
    torch_dtype=torch.bfloat16,
    device=device
)

# Function to generate images based on the image's caption
def generate_image_and_translate(image, num_images=1):
    # Generate caption in English from the uploaded image
    caption_en = caption_image(image)[0]['generated_text']

    # Translate the English caption to Arabic
    caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text']

    generated_images = []

    # Generate the specified number of images based on the English caption
    for _ in range(num_images):
        generated_image = sd_pipeline(prompt=caption_en).images[0]
        generated_images.append(generated_image)

    # Return the generated images along with both captions
    return generated_images, caption_en, caption_ar

# Function to generate images based on the image's caption
def generate_image_and_translate(image, num_images=1):
    # Generate caption in English from the uploaded image
    caption_en = caption_image(image)[0]['generated_text']

    # Translate the English caption to Arabic
    caption_ar = translator(caption_en, src_lang="eng_Latn", tgt_lang="arb_Arab")[0]['translation_text']

    generated_images = []

    # Generate the specified number of images based on the English caption
    for _ in range(num_images):
        generated_image = sd_pipeline(prompt=caption_en).images[0]
        generated_images.append(generated_image)

    # Return the generated images along with both captions
    return generated_images, caption_en, caption_ar

# Set up the Gradio interface
interface = gr.Interface(
    fn=generate_image_and_translate,   # Function to call when processing input
    inputs=[
        gr.Image(type="pil", label="πŸ“€ Upload Image"), # Input for image upload
        gr.Slider(minimum=1, maximum=10, label="πŸ”’ Number of Images", value=1, step=1) # Slider to select number of images
    ],
    outputs=[
        gr.Gallery(label="πŸ–ΌοΈ Generated Images"),
        gr.Textbox(label="πŸ“ Generated Caption (English)", interactive=False),
        gr.Textbox(label="🌍 Translated Caption (Arabic)", interactive=False)
    ],
    title="Image Generation and Captioning", # Title of the interface
    description="Upload an image to extract a caption and display it in both Arabic and English. Then, a new image will be generated based on that caption.",  # Description
    theme='freddyaboulton/dracula_revamped' # Determine theme
)

# Launch the Gradio application
interface.launch()