|
|
|
import gradio as gr |
|
from transformers import pipeline, AutoModel, AutoProcessor |
|
import torch |
|
import os |
|
import numpy as np |
|
from PIL import Image |
|
|
|
|
|
summarizer = pipeline("summarization", "csebuetnlp/mT5_multilingual_XLSum") |
|
translator_ar2en = pipeline("translation_ar_to_en", "Helsinki-NLP/opus-mt-ar-en") |
|
clip_model = AutoModel.from_pretrained("openai/clip-vit-base-patch32") |
|
clip_processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
|
|
|
|
def precompute_embeddings(image_dir="images"): |
|
image_paths = [os.path.join(image_dir, f) for f in os.listdir(image_dir) |
|
if f.lower().endswith(('.png', '.jpg', '.jpeg'))] |
|
|
|
embeddings = [] |
|
for path in image_paths: |
|
image = Image.open(path) |
|
inputs = clip_processor(images=image, return_tensors="pt") |
|
with torch.no_grad(): |
|
embeddings.append(clip_model.get_image_features(**inputs)) |
|
return image_paths, torch.cat(embeddings) |
|
|
|
image_paths, image_embeddings = precompute_embeddings() |
|
|
|
def process(input_text, language): |
|
|
|
summary = summarizer(input_text, max_length=150, min_length=30)[0]['summary_text'] |
|
|
|
|
|
if language == "Arabic": |
|
translated = translator_ar2en(summary)[0]['translation_text'] |
|
query_text = translated |
|
else: |
|
query_text = summary |
|
|
|
|
|
text_inputs = clip_processor( |
|
text=query_text, |
|
return_tensors="pt", |
|
padding=True, |
|
truncation=True |
|
) |
|
with torch.no_grad(): |
|
text_emb = clip_model.get_text_features(**text_inputs) |
|
|
|
similarities = (text_emb @ image_embeddings.T).softmax(dim=-1) |
|
top_indices = similarities.topk(3).indices.numpy() |
|
results = [image_paths[i] for i in top_indices] |
|
|
|
return summary, translated if language == "Arabic" else "", results |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
gr.Markdown("# π Multi-Task AI: Summarization & Image Retrieval") |
|
|
|
with gr.Row(): |
|
lang = gr.Dropdown(["English", "Arabic"], label="Input Language") |
|
text_input = gr.Textbox(label="Input Text", lines=5) |
|
|
|
with gr.Row(): |
|
summary_out = gr.Textbox(label="Summary") |
|
trans_out = gr.Textbox(label="English Query Text", visible=False) |
|
|
|
gallery = gr.Gallery(label="Retrieved Images", columns=3) |
|
submit = gr.Button("Process", variant="primary") |
|
|
|
def toggle_translation(lang): |
|
return gr.update(visible=lang == "Arabic") |
|
|
|
lang.change(toggle_translation, lang, trans_out) |
|
submit.click(process, [text_input, lang], [summary_out, trans_out, gallery]) |
|
|
|
demo.launch() |
|
|