ysharma HF staff commited on
Commit
f9b4404
1 Parent(s): 8ca72f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -13
app.py CHANGED
@@ -6,29 +6,39 @@ import torch
6
  from PIL import Image
7
  from transformers import AutoProcessor, LlavaForConditionalGeneration
8
  from transformers import TextIteratorStreamer
9
-
10
  import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
  PLACEHOLDER = """
14
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
15
- <img src="https://cdn-uploads.huggingface.co/production/uploads/64ccdc322e592905f922a06e/DDIW0kbWmdOQWwy4XMhwX.png" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
16
- <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLaVA-Llama-3-8B</h1>
17
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Llava-Llama-3-8b is a LLaVA model fine-tuned from Meta-Llama-3-8B-Instruct and CLIP-ViT-Large-patch14-336 with ShareGPT4V-PT and InternVL-SFT by XTuner</p>
18
  </div>
19
  """
20
 
21
 
22
  model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
23
-
24
  processor = AutoProcessor.from_pretrained(model_id)
25
-
26
  model = LlavaForConditionalGeneration.from_pretrained(
27
  model_id,
28
  torch_dtype=torch.float16,
29
  low_cpu_mem_usage=True,
30
  )
31
-
32
  model.to("cuda:0")
33
  model.generation_config.eos_token_id = 128009
34
 
@@ -88,10 +98,10 @@ def bot_streaming(message, history):
88
 
89
  chatbot=gr.Chatbot(placeholder=PLACEHOLDER,scale=1)
90
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
91
- with gr.Blocks(fill_height=True, ) as demo:
92
  gr.ChatInterface(
93
  fn=bot_streaming,
94
- title="LLaVA Llama-3-8B",
95
  examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
96
  {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
97
  description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
@@ -99,7 +109,66 @@ with gr.Blocks(fill_height=True, ) as demo:
99
  multimodal=True,
100
  textbox=chat_input,
101
  chatbot=chatbot,
102
- )
103
-
104
- demo.queue(api_open=False)
105
- demo.launch(show_api=False, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from PIL import Image
7
  from transformers import AutoProcessor, LlavaForConditionalGeneration
8
  from transformers import TextIteratorStreamer
9
+ from datasets import load_dataset
10
  import spaces
11
+ import pandas as pd
12
+
13
+ rekaeval = "RekaAI/VibeEval"
14
+ dataset = load_dataset(rekaeval, split="test")
15
+ df = pd.DataFrame(dataset)
16
+ df_markdown = df.copy()
17
+
18
+ # Function to convert URL to HTML img tag
19
+ def mediaurl_to_img_tag(url):
20
+ return f'<img src="{url}">'
21
+
22
+ # Apply the function to the DataFrame column
23
+ df_markdown['media_url'] = df_markdown['media_url'].apply(mediaurl_to_img_tag)
24
 
25
 
26
  PLACEHOLDER = """
27
  <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
28
+ <img src="https://avatars.githubusercontent.com/u/51063788?s=400&u=479ecc9d93d8a373b5c2e69ebe846f394811e94a&v=4)" style="width:40%" opacity="0.45">
29
+ <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">LLaVA-Llama3-8B With REKA Vibe-Eval</h1>
30
+ <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Test your Vision LLMs with new Vibe-Evals from REKA</p>
31
  </div>
32
  """
33
 
34
 
35
  model_id = "xtuner/llava-llama-3-8b-v1_1-transformers"
 
36
  processor = AutoProcessor.from_pretrained(model_id)
 
37
  model = LlavaForConditionalGeneration.from_pretrained(
38
  model_id,
39
  torch_dtype=torch.float16,
40
  low_cpu_mem_usage=True,
41
  )
 
42
  model.to("cuda:0")
43
  model.generation_config.eos_token_id = 128009
44
 
 
98
 
99
  chatbot=gr.Chatbot(placeholder=PLACEHOLDER,scale=1)
100
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
101
+ tmp = '''with gr.Blocks(fill_height=True, ) as demo:
102
  gr.ChatInterface(
103
  fn=bot_streaming,
104
+ title="Testing LLaVA-Llama3-8b with Reka's Vibe-Eval",
105
  examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
106
  {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
107
  description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
 
109
  multimodal=True,
110
  textbox=chat_input,
111
  chatbot=chatbot,
112
+ )'''
113
+
114
+ with gr.Blocks() as demo:
115
+ with gr.Row():
116
+ with gr.Column():
117
+ gr.ChatInterface(
118
+ fn=bot_streaming,
119
+ title="Testing LLaVA-Llama3-8b with Reka's Vibe-Eval",
120
+ description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
121
+ stop_btn="Stop Generation",
122
+ multimodal=True,
123
+ textbox=chat_input,
124
+ chatbot=chatbot,
125
+ )
126
+ with gr.Column():
127
+ with gr.Row():
128
+ b1 = gr.Button("Previous")
129
+ b2 = gr.Button("Next")
130
+ reka = gr.Dataframe(value=df_markdown[0:5], datatype=['markdown', 'str'], wrap=False, interactive=False, height=500)
131
+ num_start = gr.Number(visible=False, value=0)
132
+ num_end = gr.Number(visible=False, value=4)
133
+
134
+ chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
135
+ bot_msg = chat_msg.then(bot, chatbot, chatbot, api_name="bot_response")
136
+ bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
137
+ chatbot.like(print_like_dislike, None, None)
138
+
139
+ def get_example(reka, evt: gr.SelectData):
140
+ print(f'evt.value = {evt.value}')
141
+ print(f'evt.index = {evt.index}')
142
+ x = evt.index[0]
143
+ image = df.iloc[x, 0]
144
+ prompt = df.iloc[x, 1]
145
+ print(f'image = {image}')
146
+ print(f'prompt = {prompt}')
147
+ example = {"text": prompt, "files": [image]}
148
+ return example
149
+
150
+ def display_next(dataframe, end):
151
+ print(f'initial value of end = {end}')
152
+ start = (end or dataframe.index[-1]) + 1
153
+ end = start + 4
154
+ df_images = df_markdown.loc[start:end]
155
+ print(f'returned value of end = {end}')
156
+ print(f'returned value of start = {start}')
157
+ return df_images, end, start, gr.Button(interactive=True)
158
+
159
+ def display_previous(dataframe, start):
160
+ print(f'initial value of start = {start}')
161
+ end = (start or dataframe.index[-1])
162
+ start = end - 5
163
+ df_images = df_markdown.loc[start:end]
164
+ print(f'returned value of start = {start}')
165
+ print(f'returned value of end = {end}')
166
+ return df_images, end, start, gr.Button(interactive=False) if start==0 else gr.Button(interactive=True)
167
+
168
+ reka.select(get_example, reka, chat_input, show_progress="hidden")
169
+ b2.click(fn=display_next, inputs= [reka, num_end ], outputs=[reka, num_end, num_start, b1], api_name="next_rows", show_progress=False)
170
+ b1.click(fn=display_previous, inputs= [reka, num_start ], outputs=[reka, num_end, num_start, b1], api_name="previous_rows")
171
+
172
+
173
+ demo.queue()
174
+ demo.launch(debug=True)