Spaces:

Ghaymah
/

Vision-Q-A-GenAI-Ghaymah

Sleeping

App Files Files Community

Ahmed-El-Sharkawy commited on Sep 26

Commit

cdde84b

verified ·

1 Parent(s): 95e31de

Upload 3 files

Browse files

Files changed (3) hide show

app.py +126 -0
download.jpeg +0 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import base64
+import io
+import os
+from dotenv import load_dotenv
+from PIL import Image
+import gradio as gr
+from openai import OpenAI
+import re
+# Config
+load_dotenv()
+APP_Name    = os.getenv("APP_Name", "Ghaymah Vision QA")
+APP_Version = os.getenv("APP_Version", "1.0.0")
+API_KEY     = os.getenv("API_KEY")
+BASE_URL    = os.getenv("BASE_URL", "disappear")
+CSS = """
+.app-header{display:flex;align-items:center;gap:12px;justify-content:center;margin:6px 0 16px}
+.app-header img{height:60px;border-radius:12px}
+.app-title{font-weight:800;font-size:28px;line-height:1.1}
+.app-sub{opacity:.7;font-size:14px}
+"""
+# Branding
+COMPANY_LOGO = "download.jpeg"
+OWNER_NAME   = "ENG. Ahmed Yasser El Sharkawy"
+client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
+# Map PIL formats to MIME types
+PIL_TO_MIME = {
+    "JPEG": "image/jpeg",
+    "PNG": "image/png",
+    "WEBP": "image/webp",
+    "GIF": "image/gif",
+    "BMP": "image/bmp",
+    "TIFF": "image/tiff",
+}
+def encode_image_to_data_url(pil_image: Image.Image) -> str:
+    fmt = (pil_image.format or "PNG").upper()
+    mime = PIL_TO_MIME.get(fmt, "image/png")
+    buf = io.BytesIO()
+    if fmt == "JPEG" and pil_image.mode not in ("RGB", "L"):
+        pil_image = pil_image.convert("RGB")
+    pil_image.save(buf, format=fmt)
+    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
+    return f"data:{mime};base64,{b64}"
+def logo_data_uri(path: str) -> str:
+    if not os.path.exists(path):
+        return ""
+    ext = os.path.splitext(path)[1].lower()
+    mime = {
+        ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
+        ".webp": "image/webp", ".gif": "image/gif"
+    }.get(ext, "image/png")
+    with open(path, "rb") as f:
+        b64 = base64.b64encode(f.read()).decode("utf-8")
+    return f"data:{mime};base64,{b64}"
+def to_plain_text(s: str) -> str:
+    s = re.sub(r'\*\*(.*?)\*\*', r'\1', s)  # bold
+    s = re.sub(r'\*(.*?)\*', r'\1', s)      # italics
+    s = re.sub(r'`{1,3}(.*?)`{1,3}', r'\1', s, flags=re.S)  # code
+    s = re.sub(r'^\s*[-*]\s+', '• ', s, flags=re.M)         # bullets
+    return s
+def ask_image_question(image: Image.Image, question: str):
+    if image is None:
+        return "⚠️ Please upload an image first."
+    if not question or not question.strip():
+        question = "Describe this image."
+    try:
+        data_url = encode_image_to_data_url(image)
+        msg_content = [
+            {"type": "text", "text": question.strip()},
+            {"type": "image_url", "image_url": {"url": data_url}},
+        ]
+        resp = client.chat.completions.create(
+            model="gemma-3-4b-it",
+            messages=[{"role": "user", "content": msg_content}],
+            max_tokens=400,
+            temperature=0.2,
+        )
+        return to_plain_text(resp.choices[0].message.content or "")
+    except Exception as e:
+        return f"❌ Error: {e}"
+# Gradio UI
+with gr.Blocks(title=f"{APP_Name} v{APP_Version}", css=CSS) as demo:
+    header_logo_src = logo_data_uri(COMPANY_LOGO)
+    logo_html = f"<img src='{header_logo_src}' alt='logo'>" if header_logo_src else ""
+    gr.HTML(f"""
+    <div class="app-header">
+        {logo_html}
+        <div class="app-header-text">
+            <div class="app-title">{APP_Name}</div>
+            <div class="app-sub">v{APP_Version}  •  {OWNER_NAME}</div>
+        </div>
+    </div>
+    """)
+    with gr.Row():
+        # Left column: image -> question -> ask button
+        with gr.Column(scale=3):
+            image_in = gr.Image(type="pil", label="Upload image", sources=["upload", "clipboard"])
+            question_in = gr.Textbox(label="Your question",
+                                     placeholder="e.g., What objects do you see? What is happening?",
+                                     lines=3)
+            ask_btn = gr.Button("Ask", variant="primary")
+        # Right column: logo -> answer box
+        with gr.Column(scale=2, min_width=320):
+            if os.path.exists(COMPANY_LOGO):
+                gr.Image(COMPANY_LOGO, show_label=False, container=False, height=96)
+            answer_out = gr.Textbox(label="Answer", lines=14, interactive=False, show_copy_button=True)
+    ask_btn.click(ask_image_question, [image_in, question_in], [answer_out])
+    question_in.submit(ask_image_question, [image_in, question_in], [answer_out])
+if __name__ == "__main__":
+    demo.launch(debug=True)

download.jpeg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+openai
+python-dotenv
+pillow