Spaces:
Configuration error
Configuration error
Update agent.py
Browse files
agent.py
CHANGED
@@ -120,7 +120,8 @@ def initialize_models(use_api_mode=False):
|
|
120 |
print("Initializing models in non-API mode with local models...")
|
121 |
|
122 |
try :
|
123 |
-
from typing import
|
|
|
124 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
125 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
126 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
@@ -128,16 +129,18 @@ def initialize_models(use_api_mode=False):
|
|
128 |
import torch
|
129 |
|
130 |
class QwenVL7BCustomLLM(CustomLLM):
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
138 |
self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
|
139 |
)
|
140 |
-
self.
|
141 |
|
142 |
@property
|
143 |
def metadata(self) -> LLMMetadata:
|
@@ -148,41 +151,50 @@ def initialize_models(use_api_mode=False):
|
|
148 |
)
|
149 |
|
150 |
@llm_completion_callback()
|
151 |
-
def complete(
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
153 |
messages = [{"role": "user", "content": []}]
|
154 |
if image_paths:
|
155 |
for path in image_paths:
|
156 |
messages[0]["content"].append({"type": "image", "image": path})
|
157 |
messages[0]["content"].append({"type": "text", "text": prompt})
|
158 |
|
159 |
-
#
|
160 |
-
text = self.
|
161 |
image_inputs, video_inputs = process_vision_info(messages)
|
162 |
-
inputs = self.
|
163 |
text=[text],
|
164 |
images=image_inputs,
|
165 |
videos=video_inputs,
|
166 |
padding=True,
|
167 |
return_tensors="pt",
|
168 |
)
|
169 |
-
inputs = inputs.to(self.
|
170 |
|
171 |
# Generate output
|
172 |
-
generated_ids = self.
|
173 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
174 |
-
output_text = self.
|
175 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
176 |
)[0]
|
177 |
return CompletionResponse(text=output_text)
|
178 |
|
179 |
@llm_completion_callback()
|
180 |
-
def stream_complete(
|
|
|
|
|
|
|
|
|
|
|
181 |
response = self.complete(prompt, image_paths)
|
182 |
for token in response.text:
|
183 |
yield CompletionResponse(text=token, delta=token)
|
184 |
|
185 |
-
|
186 |
proj_llm = QwenVL7BCustomLLM()
|
187 |
|
188 |
# Code LLM
|
|
|
120 |
print("Initializing models in non-API mode with local models...")
|
121 |
|
122 |
try :
|
123 |
+
from typing import Optional, List, Any
|
124 |
+
from pydantic import Field, PrivateAttr
|
125 |
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
|
126 |
from llama_index.core.llms.callbacks import llm_completion_callback
|
127 |
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
|
|
|
129 |
import torch
|
130 |
|
131 |
class QwenVL7BCustomLLM(CustomLLM):
|
132 |
+
model_name: str = Field(default="Qwen/Qwen2.5-VL-7B-Instruct")
|
133 |
+
context_window: int = Field(default=32768)
|
134 |
+
num_output: int = Field(default=256)
|
135 |
+
_model = PrivateAttr()
|
136 |
+
_processor = PrivateAttr()
|
137 |
+
|
138 |
+
def __init__(self, **kwargs):
|
139 |
+
super().__init__(**kwargs)
|
140 |
+
self._model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
141 |
self.model_name, torch_dtype=torch.bfloat16, device_map="auto"
|
142 |
)
|
143 |
+
self._processor = AutoProcessor.from_pretrained(self.model_name)
|
144 |
|
145 |
@property
|
146 |
def metadata(self) -> LLMMetadata:
|
|
|
151 |
)
|
152 |
|
153 |
@llm_completion_callback()
|
154 |
+
def complete(
|
155 |
+
self,
|
156 |
+
prompt: str,
|
157 |
+
image_paths: Optional[List[str]] = None,
|
158 |
+
**kwargs: Any
|
159 |
+
) -> CompletionResponse:
|
160 |
+
# Prepare multimodal input
|
161 |
messages = [{"role": "user", "content": []}]
|
162 |
if image_paths:
|
163 |
for path in image_paths:
|
164 |
messages[0]["content"].append({"type": "image", "image": path})
|
165 |
messages[0]["content"].append({"type": "text", "text": prompt})
|
166 |
|
167 |
+
# Tokenize and process
|
168 |
+
text = self._processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
169 |
image_inputs, video_inputs = process_vision_info(messages)
|
170 |
+
inputs = self._processor(
|
171 |
text=[text],
|
172 |
images=image_inputs,
|
173 |
videos=video_inputs,
|
174 |
padding=True,
|
175 |
return_tensors="pt",
|
176 |
)
|
177 |
+
inputs = inputs.to(self._model.device)
|
178 |
|
179 |
# Generate output
|
180 |
+
generated_ids = self._model.generate(**inputs, max_new_tokens=self.num_output)
|
181 |
generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
|
182 |
+
output_text = self._processor.batch_decode(
|
183 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
184 |
)[0]
|
185 |
return CompletionResponse(text=output_text)
|
186 |
|
187 |
@llm_completion_callback()
|
188 |
+
def stream_complete(
|
189 |
+
self,
|
190 |
+
prompt: str,
|
191 |
+
image_paths: Optional[List[str]] = None,
|
192 |
+
**kwargs: Any
|
193 |
+
) -> CompletionResponseGen:
|
194 |
response = self.complete(prompt, image_paths)
|
195 |
for token in response.text:
|
196 |
yield CompletionResponse(text=token, delta=token)
|
197 |
|
|
|
198 |
proj_llm = QwenVL7BCustomLLM()
|
199 |
|
200 |
# Code LLM
|