ai-tube-model-moondream2

Paused

App Files Files Community

jbilcke-hf HF staff commited on Apr 22

Commit

4a5cedb

•

1 Parent(s): e2a5026

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -20

app.py CHANGED Viewed

@@ -1,11 +1,27 @@
-import spaces
 import torch
 import re
 import gradio as gr
 from threading import Thread
 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 model_id = "vikhyatk/moondream2"
@@ -18,11 +34,17 @@ moondream = AutoModelForCausalLM.from_pretrained(
 )
 moondream.eval()
-@spaces.GPU(duration=10)
-def answer_question(img, prompt):
     image_embeds = moondream.encode_image(img)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     thread = Thread(
         target=moondream.answer_question,
         kwargs={
@@ -37,23 +59,24 @@ def answer_question(img, prompt):
     buffer = ""
     for new_text in streamer:
         buffer += new_text
-        yield buffer.strip()
 with gr.Blocks() as demo:
-    gr.Markdown(
-        """
-        # 🌔 moondream2
-        A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
-        """
-    )
-    with gr.Row():
-        prompt = gr.Textbox(label="Input", value="Describe this image.", scale=4)
-        submit = gr.Button("Submit")
-    with gr.Row():
-        img = gr.Image(type="pil", label="Upload an Image")
-        output = gr.TextArea(label="Response")
-    submit.click(answer_question, [img, prompt], output)
-    prompt.submit(answer_question, [img, prompt], output)
 demo.queue().launch()

 import torch
 import re
+import os
 import gradio as gr
 from threading import Thread
 from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
 import subprocess
+from PIL import Image
+from io import BytesIO
+import base64
+SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')
+# Regex pattern to match data URI scheme
+data_uri_pattern = re.compile(r'data:image/(png|jpeg|jpg|webp);base64,')
+def readb64(b64):
+    # Remove any data URI scheme prefix with regex
+    b64 = data_uri_pattern.sub("", b64)
+    # Decode and open the image with PIL
+    img = Image.open(BytesIO(base64.b64decode(b64)))
+    return img
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 model_id = "vikhyatk/moondream2"
 )
 moondream.eval()
+def answer_question(secret_token, input, prompt):
+    if secret_token != SECRET_TOKEN:
+        raise gr.Error(
+            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
+    img = readb64(input)
     image_embeds = moondream.encode_image(img)
     streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
     thread = Thread(
         target=moondream.answer_question,
         kwargs={
     buffer = ""
     for new_text in streamer:
         buffer += new_text
+    buffer.strip()
+    return buffer
 with gr.Blocks() as demo:
+    gr.HTML("""
+        <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
+        <div style="text-align: center; color: black;">
+        <p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
+        <p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/vikhyatk/moondream2" target="_blank">original space</a>.</p>
+        </div>
+        </div>""")
+    token = gr.Textbox()
+    input = gr.Textbox()
+    prompt = gr.Textbox()
+    submit = gr.Button()
+    output = gr.Textbox()
+    submit.click(answer_question, [token, input, prompt], output)
 demo.queue().launch()