Spaces:

MrOvkill
/

moondream-2-multi-interrogation

Runtime error

App Files Files Community

Sam commited on May 16

Commit

fb30cb6

•

1 Parent(s): 3b630ea

v0.1

Browse files

Files changed (3) hide show

README.md +7 -0
app.py +134 -0
requirements.txt +6 -0

README.md CHANGED Viewed

@@ -1,4 +1,11 @@
 ---
 license: mit
 title: Moondream 2 Multi Interrogation
 ---

 ---
 license: mit
 title: Moondream 2 Multi Interrogation
+emoji: 🌀
+colorFrom: yellow
+colorTo: purple
+sdk: gradio
+sdk_version: "4.31.3"
+app_file: app.py
+pinned: true
 ---

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import json
+import torch
+import requests
+import time
+import random
+from PIL import Image
+from typing import Union
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using {device}" if device != "cpu" else "Using CPU")
+def _load_model():
+  tokenizer = AutoTokenizer.from_pretrained("vikhyatk/moondream2", trust_remote_code=True, revision="2024-05-08")
+  model = AutoModelForCausalLM.from_pretrained("vikhyatk/moondream2", device_map=device, trust_remote_code=True, revision="2024-05-08")
+  return (model, tokenizer)
+class MoonDream():
+  def __init__(self, model=None, tokenizer=None):
+    self.model, self.tokenizer = (model, tokenizer)
+    if not model or not tokenizer:
+      self.model, self.tokenizer = _load_model()
+    self.device = device
+    self.model.to(self.device)
+  def __call__(self, question, imgs):
+    imn = 0
+    for img in imgs:
+      img = self.model.encode_image(img)
+      res = self.model.answer_question(question=question, image_embeds=img, tokenizer=self.tokenizer)
+      yield res
+    return
+def _respond_one(question, img):
+  txt = ""
+  yield (txt := txt + MoonDream()(question, [img]))
+  return txt
+def respond_batch(question, **imgs):
+  md = MoonDream()
+  for img in imgs.values():
+    res = md(question, img)
+    for r in res:
+      yield r
+    yield "\n\n\n\n\n\n"
+  return
+red = Image.new("RGB", (192,192), (255,0,0))
+green = Image.new("RGB", (192,192), (0,255,0))
+blue = Image.new("RGB", (192,192), (0,0,255))
+res = respond_batch("What color is this? Elaborate upon what emotion registers most strongly with you upon viewing. ", imgs=[red, green, blue])
+for r in res:
+  print(r)
+  if "\n\n\n\n\n\n" in r:
+    break
+def dual_images(img1: Image):
+  # Ran once for each img to it's respective output. Output should be detailed str of description/feature extraction/interrogation.
+  md = MoonDream()
+  res = md("Describe the image in plain english ", [img1])
+  txt = ""
+  for r in res:
+    yield (txt := txt + r)
+  return
+import os
+with open("together_key.txt", "r") as f:
+  os.environ["TOGETHER_KEY"] = f.read().strip()
+  print("Set together key")
+def merge_descriptions_to_prompt(mi, d1, d2):
+  from together import Together
+  tog = Together(api_key=os.getenv("TOGETHER_KEY"))
+  res = tog.completions.create(prompt=f"""Describe what would result if the following two descriptions were describing one thing.
+### Description 1:
+```text
+{d1}
+```
+### Description 2:
+```text
+{d2}
+```
+Merge-Specific Instructions:
+```text
+{mi}
+```
+Ensure you end your output with ```\\n
+---
+Complete Description:
+```text""", model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024)
+  return res.choices[0].text.split("```")[0]
+def xform_image_description(img, inst):
+  from together import Together
+  desc = dual_images(img)
+  tog = Together(api_key=os.getenv("TOGETHER_KEY"))
+  prompt=f"""Describe the image in aggressively verbose detail. I must know every freckle upon a man's brow and each blade of the grass intimately.\nDescription: ```text\n{desc}\n```\nInstructions:\n```text\n{inst}\n```\n\n\n---\nDetailed Description:\n```text"""
+  res = tog.completions.create(prompt=prompt, model="meta-llama/Meta-Llama-3-70B", stop=["```"], max_tokens=1024)
+  return res.choices[0].text[len(prompt):].split("```")[0]
+with gr.Blocks() as demo:
+  with gr.Row(visible=True):
+    with gr.Column():
+      with gr.Row():
+        img = gr.Image(label="images", type='pil')
+      with gr.Row():
+        btn = gr.Button("submit")
+      with gr.Row():
+        otpt = gr.Textbox(label="output", lines=3, interactive=True)
+      with gr.Row():
+        with gr.Column():
+          im1 = gr.Image(label="image 1", type='pil')
+        with gr.Column():
+          im2 = gr.Image(label="image 2", type='pil')
+      with gr.Row():
+        btn2 = gr.Button("submit batch")
+      with gr.Row():
+        with gr.Column():
+          otp2 = gr.Textbox(label="individual batch output (left)", interactive=True)
+        with gr.Column():
+          otp3 = gr.Textbox(label="individual batch output (right)", interactive=True)
+      with gr.Row():
+          minst = gr.Textbox(label="Merge Instructions")
+      with gr.Row():
+        btn_scd = gr.Button("Merge Descriptions to Single Combined Description")
+      with gr.Row():
+        otp4 = gr.Textbox(label="batch output ( combined )", interactive=True, lines=4)
+  btn2.click(dual_images, inputs=[im1], outputs=[otp2])
+  btn2.click(dual_images, inputs=[im2], outputs=[otp3])
+  btn.click(dual_images, inputs=[img], outputs=[otpt])
+  btn_scd.click(merge_descriptions_to_prompt, inputs=[minst, otp2, otp3], outputs=[otp4])
+demo.launch(debug=True, share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio==4.31.3
+transformers==4.40.2
+accelerate==0.30.1
+einops==0.8.0
+pillow==10.3.0
+together==1.1.5