Spaces:

Mueris
/

TurkishVLMTAMGAQA

Running

App Files Files Community

TurkishVLMTAMGAQA / app.py

Mueris

Update app.py

9d3d8d6 verified 15 days ago

raw

history blame contribute delete

8.68 kB

	import gradio as gr
	from PIL import Image
	import torch
	import os

	from inference import load_for_inference, predict

	# Load TAMGA VQA model

	TAMGA_REPO = "Mueris/TurkishVLMTAMGA"

	if 'load_for_inference' in globals():
	tamga_model, tamga_tokenizer, tamga_device = load_for_inference(TAMGA_REPO)
	else:

	print("Warning: inference.py functions not loaded. Using placeholder values.")
	tamga_model, tamga_tokenizer, tamga_device = None, None, 'cpu'



	# Load BLIP Caption Model

	from transformers import BlipProcessor, BlipForConditionalGeneration

	CAPTION_REPO = "Mueris/TurkishVLMTAMGA-CaptioningModel"
	caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	caption_model = BlipForConditionalGeneration.from_pretrained(CAPTION_REPO)
	caption_model.to("cuda" if torch.cuda.is_available() else "cpu")
	caption_device = caption_model.device



	# Utility Functions


	def toggle_question_input(model_choice):

	if model_choice == "TAMGA VQA":
	return gr.update(visible=True), gr.update()
	else:
	return gr.update(visible=False), gr.update(value="")

	def select_quick_question(quick_question):
	if not quick_question:
	return gr.update(), gr.update()

	return gr.update(value=quick_question), gr.update(value=None)



	# Main Inference Function

	def answer(model_choice, image, question):

	if image is None:
	return "Lütfen bir görsel yükleyin."

	# MODEL 1 — VQA
	if model_choice == "TAMGA VQA":

	if not question.strip():
	return "Bu model soru gerektirir. Lütfen bir soru yazın."

	if tamga_model is None:
	return "Hata: TAMGA VQA modeli yüklenemedi. 'inference.py' dosyasını ve bağımlılıkları kontrol edin."

	pil_image = Image.fromarray(image)

	response = predict(tamga_model, tamga_tokenizer, tamga_device, pil_image, question)
	return f"Cevap: {response}"

	# MODEL 2 — Captioning
	elif model_choice == "BLIP Caption (Fine-Tuned)":

	pil_image = Image.fromarray(image)
	inputs = caption_processor(images=pil_image, return_tensors="pt").to(caption_device)
	output = caption_model.generate(**inputs, max_new_tokens=64)
	caption = caption_processor.decode(output[0], skip_special_tokens=True)
	return f"Açıklama: {caption}"

	return "Model bulunamadı."



	# CSS

	css = """
	#col-container {
	max-width: 1100px;
	margin-left: auto;
	margin-right: auto;
	}
	.gradio-container {
	background-color: #ffffff !important;
	}
	h1, h2, h3, p, label {
	color: #000000 !important;
	}

	/* VIBRANT ACCENT COLOR DEFINITION */
	:root {
	--gradio-primary-500: #1E90FF; /* Dodger Blue - Vibrant Accent */
	}

	#title {
	text-align: center;
	font-size: 2.2rem;
	font-weight: bold;
	margin-top: 20px;
	color: #1E90FF !important; /* Apply vibrant color to title */
	}
	#subtitle {
	text-align: center;
	font-size: 1.1rem;
	color: #666666 !important; /* Darker grey subtitle */
	margin-bottom: 25px;
	}

	/* Modern Input and Dropdown Styling */
	.wrap, .input_text, .scroll-hide, .gradio-dropdown {
	border-radius: 8px !important;
	}

	/* Primary Button Styling */
	button[variant="primary"] {
	background-color: #1E90FF !important;
	border-color: #1E90FF !important;
	color: white !important;
	transition: all 0.2s ease-in-out;
	border-radius: 8px !important;
	box-shadow: 0 4px 10px rgba(30, 144, 255, 0.4); /* Stronger shadow */
	font-weight: bold;
	padding: 10px 20px;
	}
	button[variant="primary"]:hover {
	background-color: #0080FF !important;
	}

	/* Output Box Styling */
	.output-box {
	background-color: #fcfcfc !important; /* Very subtle background */
	border-radius: 12px; /* More rounded */
	padding: 20px;
	border: 1px solid #bbdffc; /* Light blue border matching the accent */
	box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); /* Noticeable shadow */
	color: #000000 !important;
	font-size: 1.1rem;
	min-height: 200px;
	}

	/* Custom CSS for image examples */
	#image-examples .thumbnail-item {
	max-width: 100px !important;
	height: auto;
	}

	/* --- Quick Questions Radio Button Styling (Matching User Image) --- */
	#quick-questions-label {
	margin-bottom: 8px;
	font-weight: 600;
	}
	#quick-questions > label {
	display: block; /* Sadece başlığı değil, tüm radyo grubunu hedefler */
	}

	#quick-questions > label > div > fieldset {
	display: flex; /* Düğmeleri yatay hizalar */
	flex-wrap: wrap; /* Gerekirse alt satıra geçmesini sağlar */
	gap: 8px; /* Düğmeler arasında boşluk bırakır */
	padding: 0;
	margin: 0;
	border: none;
	}

	/* Radyo Düğmesi Görünümü */
	#quick-questions label.radio {
	border: 1px solid #ddd;
	border-radius: 8px;
	padding: 8px 12px;
	cursor: pointer;
	transition: all 0.2s ease-in-out;
	background-color: #f9f9f9;
	}

	/* Seçili Radyo Düğmesi Görünümü */
	#quick-questions input[type="radio"]:checked + span {
	color: white; /* Seçili metni beyaz yapar */
	background-color: #1E90FF; /* Vibrant Accent rengini uygular */
	border-color: #1E90FF;
	box-shadow: 0 2px 5px rgba(30, 144, 255, 0.3);
	}

	/* Radyo düğmesi input'unu gizle */
	#quick-questions input[type="radio"] {
	display: none;
	}

	/* Metin kutusunu ayarla */
	#quick-questions label.radio span {
	padding: 0;
	margin: 0;
	display: inline-block;
	color: #333;
	font-weight: 500;
	line-height: 1.2;
	}
	#quick-questions input[type="radio"]:checked + span {
	color: white;
	}
	/* ----------------------------------------------------------------- */
	"""


	# -----------------------
	# VQA Question Examples
	# -----------------------
	VQA_QUESTION_CHOICES = [
	"Bu görselde kaç tane insan figürü var?",
	"Görselde ne görüyorsun?",
	"Fotoğrafta ne tür bir araç görülüyor?",
	"Bu görselde hava aracı var mı?"
	]



	# UI Layout

	with gr.Blocks(css=css) as demo:

	gr.HTML("<div id='title'>🇹🇷 TAMGA — Çok Modelli Türkçe Görsel Dil Modeli</div>")
	gr.HTML("<div id='subtitle'>TAMGA VQA (Soru Cevap) veya TAMGA Görsel Açıklama modellerinden birini seçin.</div>")

	with gr.Row(elem_id="col-container"):

	# LEFT SIDE
	with gr.Column(scale=1):

	model_choice = gr.Dropdown(
	choices=["TAMGA VQA", "BLIP Caption (Fine-Tuned)"],
	value="TAMGA VQA",
	label="🔧 Model Seç"
	)

	image = gr.Image(type="numpy", label="📷 Görsel Yükle")

	# --- Image Examples ---
	gr.Examples(
	examples=["example01.jpg", "example02.jpg", "example03.jpg", "example04.jpeg"],
	inputs=image,
	label="Örnek Görseller (Seçmek için tıklayın)",
	elem_id="image-examples"
	)
	# ----------------------


	with gr.Group(visible=True) as vqa_inputs_group:
	question = gr.Textbox(
	label="Soru (Sadece VQA Modeli İçin)",
	placeholder="Örn: Bu platform ne sınıf bir araçtır?"
	)


	gr.HTML("<div id='quick-questions-label'>Hızlı Sorular (Seçmek için tıklayın)</div>")

	quick_question_radio = gr.Radio(
	choices=VQA_QUESTION_CHOICES,
	label="Hızlı Sorular",
	value=None,
	elem_id="quick-questions",
	container=False
	)
	# --------------------------------------

	submit_btn = gr.Button("Çalıştır", variant="primary")

	# RIGHT SIDE
	with gr.Column(scale=1):

	output = gr.Markdown(elem_classes="output-box")

	# Button click run model
	submit_btn.click(
	fn=answer,
	inputs=[model_choice, image, question],
	outputs=output
	)


	# --- Dynamic Visibility Control ---
	model_choice.change(
	fn=toggle_question_input,
	inputs=[model_choice],
	outputs=[vqa_inputs_group, question],
	queue=False
	)



	# --- Quick Question Selection Logic ---

	js_func = """
	(val, curr_text) => {
	if (val) {
	return [val, null];
	}
	return [curr_text, null];
	}
	"""

	quick_question_radio.change(
	fn=None,
	inputs=[quick_question_radio, question],
	outputs=[question, quick_question_radio],
	js=js_func,
	show_progress="hidden"
	)



	if __name__ == "__main__":
	demo.launch()