teowu commited on
Commit
bf92928
1 Parent(s): d506e15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -11,7 +11,7 @@ model = AutoModelForCausalLM.from_pretrained("q-future/co-instruct-preview",
11
  attn_implementation="eager",
12
  device_map={"":"cuda:0"})
13
 
14
- def chat(message, history, image_1, image_2, image_3):
15
  print(history)
16
  if history:
17
  if image_1 is not None and image_2 is None:
@@ -28,11 +28,18 @@ def chat(message, history, image_1, image_2, image_3):
28
  message = past_message + "USER:" + message + " ASSISTANT:"
29
  images = [image_1, image_2]
30
  else:
31
- past_message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
32
- for i in range((len(history) - 1)):
33
- past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
34
- message = past_message + "USER:" + message + " ASSISTANT:"
35
- images = [image_1, image_2, image_3]
 
 
 
 
 
 
 
36
  else:
37
  if image_1 is not None and image_2 is None:
38
  message = "USER: The image: <|image|> " + message + " ASSISTANT:"
@@ -42,17 +49,21 @@ def chat(message, history, image_1, image_2, image_3):
42
  message = "USER: The first image: <|image|>\nThe second image: <|image|>" + message + " ASSISTANT:"
43
  images = [image_1, image_2]
44
  else:
45
- message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + message + " ASSISTANT:"
46
- images = [image_1, image_2, image_3]
 
 
 
 
47
 
48
  print(message)
49
 
50
- return model.tokenizer.batch_decode(model.chat(message, images, max_new_tokens=150).clamp(0, 100000))[0].split("ASSISTANT:")[-1]
51
 
52
 
53
  with gr.Blocks(title="img") as demo:
54
  title_markdown = ("""
55
- <div align="center">*Preview Version (v1)! Now we support two images as inputs! Try it now!*</div>
56
  <h1 align="center"><a href="https://github.com/Q-Future/Q-Instruct"><img src="https://github.com/Q-Future/Q-Instruct/blob/main/q_instruct_logo.png?raw=true", alt="Q-Instruct (mPLUG-Owl-2)" border="0" style="margin: 0 auto; height: 85px;" /></a> </h1>
57
  <h2 align="center">Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models</h2>
58
  <h5 align="center"> Please find our more accurate visual scoring demo on <a href='https://huggingface.co/spaces/teowu/OneScorer'>[OneScorer]</a>!</h2>
@@ -69,5 +80,6 @@ with gr.Blocks(title="img") as demo:
69
  input_img_1 = gr.Image(type='pil', label="Image 1 (First image)")
70
  input_img_2 = gr.Image(type='pil', label="Image 2 (Second image)")
71
  input_img_3 = gr.Image(type='pil', label="Image 3 (Third image)")
72
- gr.ChatInterface(fn = chat, additional_inputs=[input_img_1, input_img_2, input_img_3])
 
73
  demo.launch(share=True)
 
11
  attn_implementation="eager",
12
  device_map={"":"cuda:0"})
13
 
14
+ def chat(message, history, image_1, image_2, image_3, image_4):
15
  print(history)
16
  if history:
17
  if image_1 is not None and image_2 is None:
 
28
  message = past_message + "USER:" + message + " ASSISTANT:"
29
  images = [image_1, image_2]
30
  else:
31
+ if image_4 is None:
32
+ past_message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
33
+ for i in range((len(history) - 1)):
34
+ past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
35
+ message = past_message + "USER:" + message + " ASSISTANT:"
36
+ images = [image_1, image_2, image_3]
37
+ else:
38
+ past_message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>\nThe fourth image:<|image|>" + history[0][0] + " ASSISTANT:" + history[0][1] + "</s>"
39
+ for i in range((len(history) - 1)):
40
+ past_message += "USER:" + history[i][0] + " ASSISTANT:" + history[i][1] + "</s>"
41
+ message = past_message + "USER:" + message + " ASSISTANT:"
42
+ images = [image_1, image_2, image_3, image_4]
43
  else:
44
  if image_1 is not None and image_2 is None:
45
  message = "USER: The image: <|image|> " + message + " ASSISTANT:"
 
49
  message = "USER: The first image: <|image|>\nThe second image: <|image|>" + message + " ASSISTANT:"
50
  images = [image_1, image_2]
51
  else:
52
+ if image_4 is None:
53
+ message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>" + message + " ASSISTANT:"
54
+ images = [image_1, image_2, image_3]
55
+ else:
56
+ message = "USER: The first image: <|image|>\nThe second image: <|image|>\nThe third image:<|image|>\nThe fourth image:<|image|>" + message + " ASSISTANT:"
57
+ images = [image_1, image_2, image_3, image_4]
58
 
59
  print(message)
60
 
61
+ return model.tokenizer.batch_decode(model.chat(message, images, max_new_tokens=300).clamp(0, 100000))[0].split("ASSISTANT:")[-1]
62
 
63
 
64
  with gr.Blocks(title="img") as demo:
65
  title_markdown = ("""
66
+ <h3 align="center">*Super Version of Q-Instruct with Multi-image (up to 4, same as GPT-4V) Support!*</h3>
67
  <h1 align="center"><a href="https://github.com/Q-Future/Q-Instruct"><img src="https://github.com/Q-Future/Q-Instruct/blob/main/q_instruct_logo.png?raw=true", alt="Q-Instruct (mPLUG-Owl-2)" border="0" style="margin: 0 auto; height: 85px;" /></a> </h1>
68
  <h2 align="center">Q-Instruct: Improving Low-level Visual Abilities for Multi-modality Foundation Models</h2>
69
  <h5 align="center"> Please find our more accurate visual scoring demo on <a href='https://huggingface.co/spaces/teowu/OneScorer'>[OneScorer]</a>!</h2>
 
80
  input_img_1 = gr.Image(type='pil', label="Image 1 (First image)")
81
  input_img_2 = gr.Image(type='pil', label="Image 2 (Second image)")
82
  input_img_3 = gr.Image(type='pil', label="Image 3 (Third image)")
83
+ input_img_4 = gr.Image(type='pil', label="Image 4 (Third image)")
84
+ gr.ChatInterface(fn = chat, additional_inputs=[input_img_1, input_img_2, input_img_3, input_img_4])
85
  demo.launch(share=True)