Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -113,7 +113,7 @@ def run_inference(message, history, model_picked):
|
|
113 |
print(prompt)
|
114 |
|
115 |
# Gnerating Response
|
116 |
-
|
117 |
prompt = prompt,
|
118 |
max_new_tokens = 1024,
|
119 |
temperature = 0.15,
|
@@ -122,15 +122,11 @@ def run_inference(message, history, model_picked):
|
|
122 |
decode_special_tokens = True,
|
123 |
stop_conditions = [tokenizer.eos_token_id],
|
124 |
gen_settings = ExLlamaV2Sampler.Settings.greedy(),
|
125 |
-
embeddings = images_embeddings
|
126 |
-
|
127 |
-
)
|
128 |
-
|
129 |
-
|
130 |
-
else:
|
131 |
-
result = out
|
132 |
-
print(result)
|
133 |
-
yield result
|
134 |
|
135 |
description="""
|
136 |
A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
|
|
|
113 |
print(prompt)
|
114 |
|
115 |
# Gnerating Response
|
116 |
+
output = generator.generate(
|
117 |
prompt = prompt,
|
118 |
max_new_tokens = 1024,
|
119 |
temperature = 0.15,
|
|
|
122 |
decode_special_tokens = True,
|
123 |
stop_conditions = [tokenizer.eos_token_id],
|
124 |
gen_settings = ExLlamaV2Sampler.Settings.greedy(),
|
125 |
+
embeddings = images_embeddings
|
126 |
+
)
|
127 |
+
result = out.split("[/INST]")[-1]
|
128 |
+
print(result)
|
129 |
+
return result
|
|
|
|
|
|
|
|
|
130 |
|
131 |
description="""
|
132 |
A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
|