vilarin commited on
Commit
b13c502
1 Parent(s): ad20a0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -81
app.py CHANGED
@@ -1,11 +1,12 @@
 
 
1
  from PIL import Image
2
  import gradio as gr
3
  import spaces
 
4
  import os
5
- from huggingface_hub import hf_hub_download
6
- import base64
7
- from llama_cpp import Llama
8
- from llama_cpp.llama_chat_format import Llava15ChatHandler
9
 
10
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
11
  MODEL_LIST = ["openbmb/MiniCPM-Llama3-V-2_5","openbmb/MiniCPM-Llama3-V-2_5-int4"]
@@ -26,108 +27,64 @@ CSS = """
26
  }
27
  """
28
 
29
- chat_handler = Llava15ChatHandler.from_pretrained(
30
- repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
31
- filename="*mmproj*",
32
- )
33
-
34
- llm = Llama.from_pretrained(
35
- repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
36
- filename="ggml-model-Q5_K_M.gguf",
37
- chat_handler=chat_handler,
38
- n_ctx=4096,
39
- verbose=True
40
- )
41
 
42
- '''
43
- filenames = [
44
- "*mmproj*",
45
- "ggml-model-Q5_K_M.gguf"
46
- ]
47
 
48
- for filename in filenames:
49
- downloaded_model_path = hf_hub_download(
50
- repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
51
- filename=filename,
52
- local_dir="model"
53
- )
54
- '''
55
-
56
-
57
- def image_to_base64_data_uri(file_path):
58
- with open(file_path, "rb") as img_file:
59
- base64_data = base64.b64encode(img_file.read()).decode('utf-8')
60
- return f"data:image/png;base64,{base64_data}"
61
-
62
- @spaces.GPU(queue=False)
63
  def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
64
  print(f'message is - {message}')
65
  print(f'history is - {history}')
66
- messages = []
67
-
68
  if message["files"]:
69
- image = message["files"][-1]
70
- messages.append({
71
- "role": "user",
72
- "content": [
73
- {"type": "text", "text": message['text']},
74
- {"type": "image_url", "image_url":{"url": image}}
75
- ]
76
- })
77
  else:
78
  if len(history) == 0:
79
  raise gr.Error("Please upload an image first.")
80
  image = None
81
  else:
82
- image = history[0][0][0]
83
  for prompt, answer in history:
84
  if answer is None:
85
- messages.extend([{
86
- "role": "user",
87
- "content": [
88
- {"type": "text", "text": prompt},
89
- {"type": "image_url", "image_url": {"url": image}}
90
- ]
91
- },{
92
- "role": "assistant",
93
- "content": ""
94
- }])
95
  else:
96
- messages.extend([{
97
- "role": "user",
98
- "content": [
99
- {"type": "text", "text": prompt},
100
- {"type": "image_url", "image_url": {"url": image}}
101
- ]
102
- }, {
103
- "role": "assistant",
104
- "content": answer
105
- }])
106
- messages.append({"role": "user", "content": message['text']})
107
- print(f"Messages is -\n{messages}")
108
-
109
-
110
- response = llm.create_chat_completion(
111
- messages = messages,
112
  temperature=temperature,
113
- max_tokens=max_new_tokens,
 
114
  )
 
 
115
 
116
- return response["choices"][0]["text"]
 
117
 
118
 
119
  chatbot = gr.Chatbot(height=450)
120
  chat_input = gr.MultimodalTextbox(
121
- interactive=True,
122
- file_types=["image"],
123
- placeholder="Enter message or upload file...",
124
  show_label=False,
125
 
126
  )
127
  EXAMPLES = [
128
- [{"text": "What is on the desk?", "files": ["./laptop.jpg"]}],
129
- [{"text": "Where it is?", "files": ["./hotel.jpg"]}],
130
- [{"text": "Can yo describe this image?", "files": ["./spacecat.png"]}]
131
  ]
132
 
133
  with gr.Blocks(css=CSS) as demo:
 
1
+ from threading import Thread
2
+ import torch
3
  from PIL import Image
4
  import gradio as gr
5
  import spaces
6
+ from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
7
  import os
8
+
9
+
 
 
10
 
11
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
12
  MODEL_LIST = ["openbmb/MiniCPM-Llama3-V-2_5","openbmb/MiniCPM-Llama3-V-2_5-int4"]
 
27
  }
28
  """
29
 
30
+ model = AutoModel.from_pretrained(
31
+ MODEL_ID,
32
+ torch_dtype=torch.float16,
33
+ trust_remote_code=True
34
+ ).to(0)
35
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
36
+ model.eval()
 
 
 
 
 
37
 
 
 
 
 
 
38
 
39
+ @spaces.GPU()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
41
  print(f'message is - {message}')
42
  print(f'history is - {history}')
43
+ conversation = []
 
44
  if message["files"]:
45
+ image = Image.open(message["files"][-1]).convert('RGB')
46
+ conversation.append({"role": "user", "content": message['text']})
 
 
 
 
 
 
47
  else:
48
  if len(history) == 0:
49
  raise gr.Error("Please upload an image first.")
50
  image = None
51
  else:
52
+ image = Image.open(history[0][0][0])
53
  for prompt, answer in history:
54
  if answer is None:
55
+ conversation.extend([{"role": "user", "content": prompt},{"role": "assistant", "content": ""}])
 
 
 
 
 
 
 
 
 
56
  else:
57
+ conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
58
+ conversation.append({"role": "user", "content": message['text']})
59
+ print(f"Conversation is -\n{conversation}")
60
+
61
+ generate_kwargs = dict(
62
+ image=image,
63
+ msgs=conversation,
64
+ max_new_tokens=max_new_tokens,
 
 
 
 
 
 
 
 
65
  temperature=temperature,
66
+ sampling=True,
67
+ tokenizer=tokenizer,
68
  )
69
+ if temperature == 0:
70
+ generate_kwargs["sampling"] = False
71
 
72
+ response = model.chat(**generate_kwargs)
73
+ return response
74
 
75
 
76
  chatbot = gr.Chatbot(height=450)
77
  chat_input = gr.MultimodalTextbox(
78
+ interactive=True,
79
+ file_types=["image"],
80
+ placeholder="Enter message or upload file...",
81
  show_label=False,
82
 
83
  )
84
  EXAMPLES = [
85
+ [{"text": "Describe it in great detailed.", "files": ["./laptop.jpg"]}],
86
+ [{"text": "Describe it in great detailed.", "files": ["./hotel.jpg"]}],
87
+ [{"text": "Describe it in great detailed.", "files": ["./spacecat.png"]}]
88
  ]
89
 
90
  with gr.Blocks(css=CSS) as demo: