Coool2 commited on
Commit
0419c04
·
1 Parent(s): 6869498

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +18 -44
agent.py CHANGED
@@ -116,49 +116,23 @@ def initialize_models(use_api_mode=False):
116
  else:
117
  # Non-API Mode - Using HuggingFace models
118
  print("Initializing models in non-API mode with local models...")
119
- try:
120
- # Try to use Pixtral 12B with vLLM if available
121
- pixtral_model = "Qwen/Qwen3-8B" # Fallback model
122
- try:
123
- pixtral_model = "mistralai/Pixtral-12B-2409"
124
- print(f"Using Pixtral 12B with vLLM")
125
-
126
- # Custom prompt template for Pixtral model
127
- def messages_to_prompt(messages):
128
- prompt = "\n".join([str(x) for x in messages])
129
- return f"<s>[INST] {prompt} [/INST] </s>\n"
130
-
131
- def completion_to_prompt(completion):
132
- return f"<s>[INST] {completion} [/INST] </s>\n"
133
-
134
- proj_llm = Vllm(
135
- model=pixtral_model,
136
- tensor_parallel_size=1, # Adjust based on available GPUs
137
- max_new_tokens=16000,
138
- messages_to_prompt=messages_to_prompt,
139
- completion_to_prompt=completion_to_prompt,
140
- temperature=0.6,
141
- top_p=0.95,
142
- top_k=20
143
- )
144
- except (ImportError, Exception) as e:
145
- print(f"Error loading Pixtral with vLLM: {e}")
146
- print(f"Falling back to {pixtral_model} with HuggingFace...")
147
-
148
- # Fallback to regular HuggingFace LLM
149
- proj_llm = HuggingFaceLLM(
150
- model_name=pixtral_model,
151
- tokenizer_name=pixtral_model,
152
- device_map="auto",
153
- max_new_tokens=16000,
154
- model_kwargs={"torch_dtype": "auto"},
155
- generate_kwargs={
156
- "temperature": 0.6,
157
- "top_p": 0.95,
158
- "top_k": 20
159
- }
160
- )
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  # Code LLM
163
  code_llm = HuggingFaceLLM(
164
  model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
@@ -167,7 +141,7 @@ def initialize_models(use_api_mode=False):
167
  model_kwargs={"torch_dtype": "auto"},
168
  generate_kwargs={"do_sample": False}
169
  )
170
-
171
  # Embedding model
172
  embed_model = HuggingFaceEmbedding(
173
  model_name="llamaindex/vdr-2b-multi-v1",
@@ -178,7 +152,7 @@ def initialize_models(use_api_mode=False):
178
  "low_cpu_mem_usage": True
179
  }
180
  )
181
-
182
  return proj_llm, code_llm, embed_model
183
  except Exception as e:
184
  print(f"Error initializing models: {e}")
 
116
  else:
117
  # Non-API Mode - Using HuggingFace models
118
  print("Initializing models in non-API mode with local models...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ try :
121
+ pixtral_model = "Prarabdha/pixtral-12b-240910-hf"
122
+ # Fallback to regular HuggingFace LLM
123
+ proj_llm = HuggingFaceLLM(
124
+ model_name=pixtral_model,
125
+ tokenizer_name=pixtral_model,
126
+ device_map="auto",
127
+ max_new_tokens=16000,
128
+ model_kwargs={"torch_dtype": "auto"},
129
+ generate_kwargs={
130
+ "temperature": 0.6,
131
+ "top_p": 0.95,
132
+ "top_k": 20
133
+ }
134
+ )
135
+
136
  # Code LLM
137
  code_llm = HuggingFaceLLM(
138
  model_name="Qwen/Qwen2.5-Coder-3B-Instruct",
 
141
  model_kwargs={"torch_dtype": "auto"},
142
  generate_kwargs={"do_sample": False}
143
  )
144
+
145
  # Embedding model
146
  embed_model = HuggingFaceEmbedding(
147
  model_name="llamaindex/vdr-2b-multi-v1",
 
152
  "low_cpu_mem_usage": True
153
  }
154
  )
155
+
156
  return proj_llm, code_llm, embed_model
157
  except Exception as e:
158
  print(f"Error initializing models: {e}")