vikhyatk commited on
Commit
5781b89
1 Parent(s): 952611b
Files changed (3) hide show
  1. README.md +5 -3
  2. app.py +60 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
- title: Moondream2
3
- emoji: 🐢
4
  colorFrom: indigo
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: moondream1
3
+ emoji: 🌔
4
  colorFrom: indigo
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ preload_from_hub:
12
+ - vikhyatk/moondream2
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import torch
3
+ import re
4
+ import gradio as gr
5
+ from threading import Thread
6
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
7
+
8
+ parser = argparse.ArgumentParser()
9
+
10
+ if torch.cuda.is_available():
11
+ device, dtype = "cuda", torch.float16
12
+ else:
13
+ device, dtype = "cpu", torch.float32
14
+
15
+ model_id = "vikhyatk/moondream2"
16
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision="2024-03-04")
17
+ moondream = AutoModelForCausalLM.from_pretrained(
18
+ model_id, trust_remote_code=True, revision="2024-03-04"
19
+ ).to(device=device, dtype=dtype)
20
+ moondream.eval()
21
+
22
+
23
+ def answer_question(img, prompt):
24
+ image_embeds = moondream.encode_image(img)
25
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
26
+ thread = Thread(
27
+ target=moondream.answer_question,
28
+ kwargs={
29
+ "image_embeds": image_embeds,
30
+ "question": prompt,
31
+ "tokenizer": tokenizer,
32
+ "streamer": streamer,
33
+ },
34
+ )
35
+ thread.start()
36
+
37
+ buffer = ""
38
+ for new_text in streamer:
39
+ clean_text = re.sub("<$|<END$", "", new_text)
40
+ buffer += clean_text
41
+ yield buffer
42
+
43
+
44
+ with gr.Blocks() as demo:
45
+ gr.Markdown(
46
+ """
47
+ # 🌔 moondream2
48
+ A tiny vision language model. [GitHub](https://github.com/vikhyat/moondream)
49
+ """
50
+ )
51
+ with gr.Row():
52
+ prompt = gr.Textbox(label="Input", placeholder="Type here...", scale=4)
53
+ submit = gr.Button("Submit")
54
+ with gr.Row():
55
+ img = gr.Image(type="pil", label="Upload an Image")
56
+ output = gr.TextArea(label="Response")
57
+ submit.click(answer_question, [img, prompt], output)
58
+ prompt.submit(answer_question, [img, prompt], output)
59
+
60
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ timm==0.9.12
2
+ transformers==4.36.2
3
+ einops==0.7.0