Mueris's picture
Update app.py
9d3d8d6 verified
import gradio as gr
from PIL import Image
import torch
import os
from inference import load_for_inference, predict
# Load TAMGA VQA model
TAMGA_REPO = "Mueris/TurkishVLMTAMGA"
if 'load_for_inference' in globals():
tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)
else:
print("Warning: inference.py functions not loaded. Using placeholder values.")
tamga_model, tamga_tokenizer, tamga_device = None, None, 'cpu'
# Load BLIP Caption Model
from transformers import BlipProcessor, BlipForConditionalGeneration
CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO)
caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
caption_device = caption_model.device
# Utility Functions
def toggle_question_input(model_choice):
if model_choice == "TAMGA VQA":
return gr.update(visible=True), gr.update()
else:
return gr.update(visible=False), gr.update(value="")
def select_quick_question(quick_question):
if not quick_question:
return gr.update(), gr.update()
return gr.update(value=quick_question), gr.update(value=None)
# Main Inference Function
def answer(model_choice, image, question):
if image is None:
return "**Lütfen bir görsel yükleyin.**"
# MODEL 1 — VQA
if model_choice == "TAMGA VQA":
if not question.strip():
return "**Bu model soru gerektirir. Lütfen bir soru yazın.**"
if tamga_model is None:
return "**Hata: TAMGA VQA modeli yüklenemedi. 'inference.py' dosyasını ve bağımlılıkları kontrol edin.**"
pil_image = Image.fromarray(image)
response = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
return f"**Cevap:** {response}"
# MODEL 2 — Captioning
elif model_choice == "BLIP Caption (Fine-Tuned)":
pil_image = Image.fromarray(image)
inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
output = caption_model.generate(**inputs, max_new_tokens=64)
caption = caption_processor.decode(output[0], skip_special_tokens=True)
return f"**Açıklama:** {caption}"
return "**Model bulunamadı.**"
# CSS
css = """
#col-container {
max-width: 1100px;
margin-left: auto;
margin-right: auto;
}
.gradio-container {
background-color: #ffffff !important;
}
h1, h2, h3, p, label {
color: #000000 !important;
}
/* VIBRANT ACCENT COLOR DEFINITION */
:root {
--gradio-primary-500: #1E90FF; /* Dodger Blue - Vibrant Accent */
}
#title {
text-align: center;
font-size: 2.2rem;
font-weight: bold;
margin-top: 20px;
color: #1E90FF !important; /* Apply vibrant color to title */
}
#subtitle {
text-align: center;
font-size: 1.1rem;
color: #666666 !important; /* Darker grey subtitle */
margin-bottom: 25px;
}
/* Modern Input and Dropdown Styling */
.wrap, .input_text, .scroll-hide, .gradio-dropdown {
border-radius: 8px !important;
}
/* Primary Button Styling */
button[variant="primary"] {
background-color: #1E90FF !important;
border-color: #1E90FF !important;
color: white !important;
transition: all 0.2s ease-in-out;
border-radius: 8px !important;
box-shadow: 0 4px 10px rgba(30, 144, 255, 0.4); /* Stronger shadow */
font-weight: bold;
padding: 10px 20px;
}
button[variant="primary"]:hover {
background-color: #0080FF !important;
}
/* Output Box Styling */
.output-box {
background-color: #fcfcfc !important; /* Very subtle background */
border-radius: 12px; /* More rounded */
padding: 20px;
border: 1px solid #bbdffc; /* Light blue border matching the accent */
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); /* Noticeable shadow */
color: #000000 !important;
font-size: 1.1rem;
min-height: 200px;
}
/* Custom CSS for image examples */
#image-examples .thumbnail-item {
max-width: 100px !important;
height: auto;
}
/* --- Quick Questions Radio Button Styling (Matching User Image) --- */
#quick-questions-label {
margin-bottom: 8px;
font-weight: 600;
}
#quick-questions > label {
display: block; /* Sadece başlığı değil, tüm radyo grubunu hedefler */
}
#quick-questions > label > div > fieldset {
display: flex; /* Düğmeleri yatay hizalar */
flex-wrap: wrap; /* Gerekirse alt satıra geçmesini sağlar */
gap: 8px; /* Düğmeler arasında boşluk bırakır */
padding: 0;
margin: 0;
border: none;
}
/* Radyo Düğmesi Görünümü */
#quick-questions label.radio {
border: 1px solid #ddd;
border-radius: 8px;
padding: 8px 12px;
cursor: pointer;
transition: all 0.2s ease-in-out;
background-color: #f9f9f9;
}
/* Seçili Radyo Düğmesi Görünümü */
#quick-questions input[type="radio"]:checked + span {
color: white; /* Seçili metni beyaz yapar */
background-color: #1E90FF; /* Vibrant Accent rengini uygular */
border-color: #1E90FF;
box-shadow: 0 2px 5px rgba(30, 144, 255, 0.3);
}
/* Radyo düğmesi input'unu gizle */
#quick-questions input[type="radio"] {
display: none;
}
/* Metin kutusunu ayarla */
#quick-questions label.radio span {
padding: 0;
margin: 0;
display: inline-block;
color: #333;
font-weight: 500;
line-height: 1.2;
}
#quick-questions input[type="radio"]:checked + span {
color: white;
}
/* ----------------------------------------------------------------- */
"""
# -----------------------
# VQA Question Examples
# -----------------------
VQA_QUESTION_CHOICES = [
"Bu görselde kaç tane insan figürü var?",
"Görselde ne görüyorsun?",
"Fotoğrafta ne tür bir araç görülüyor?",
"Bu görselde hava aracı var mı?"
]
# UI Layout
with gr.Blocks(css=css) as demo:
gr.HTML("<div id='title'>🇹🇷 TAMGA — Çok Modelli Türkçe Görsel Dil Modeli</div>")
gr.HTML("<div id='subtitle'>TAMGA VQA (Soru Cevap) veya TAMGA Görsel Açıklama modellerinden birini seçin.</div>")
with gr.Row(elem_id="col-container"):
# LEFT SIDE
with gr.Column(scale=1):
model_choice = gr.Dropdown(
choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"],
value="TAMGA VQA",
label="🔧 Model Seç"
)
image = gr.Image(type="numpy", label="📷 Görsel Yükle")
# --- Image Examples ---
gr.Examples(
examples=["example01.jpg", "example02.jpg", "example03.jpg", "example04.jpeg"],
inputs=image,
label="Örnek Görseller (Seçmek için tıklayın)",
elem_id="image-examples"
)
# ----------------------
with gr.Group(visible=True) as vqa_inputs_group:
question = gr.Textbox(
label="Soru (Sadece VQA Modeli İçin)",
placeholder="Örn: Bu platform ne sınıf bir araçtır?"
)
gr.HTML("<div id='quick-questions-label'>Hızlı Sorular (Seçmek için tıklayın)</div>")
quick_question_radio = gr.Radio(
choices=VQA_QUESTION_CHOICES,
label="Hızlı Sorular",
value=None,
elem_id="quick-questions",
container=False
)
# --------------------------------------
submit_btn = gr.Button("Çalıştır", variant="primary")
# RIGHT SIDE
with gr.Column(scale=1):
output = gr.Markdown(elem_classes="output-box")
# Button click run model
submit_btn.click(
fn=answer,
inputs=[model_choice, image, question],
outputs=output
)
# --- Dynamic Visibility Control ---
model_choice.change(
fn=toggle_question_input,
inputs=[model_choice],
outputs=[vqa_inputs_group, question],
queue=False
)
# --- Quick Question Selection Logic ---
js_func = """
(val, curr_text) => {
if (val) {
return [val, null];
}
return [curr_text, null];
}
"""
quick_question_radio.change(
fn=None,
inputs=[quick_question_radio, question],
outputs=[question, quick_question_radio],
js=js_func,
show_progress="hidden"
)
if __name__ == "__main__":
demo.launch()