Spaces:

pandora-s
/

Pixtral-12B-EXL2

Running on Zero

pandora-s commited on Nov 11, 2024

Commit

305e0ae

•

1 Parent(s): c99b37c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -113,7 +113,7 @@ def run_inference(message, history, model_picked):
     print(prompt)
     # Gnerating Response
-    for out in generator.generate(
         prompt = prompt,
         max_new_tokens = 1024,
         temperature = 0.15,
@@ -122,15 +122,11 @@ def run_inference(message, history, model_picked):
         decode_special_tokens = True,
         stop_conditions = [tokenizer.eos_token_id],
         gen_settings = ExLlamaV2Sampler.Settings.greedy(),
-        embeddings = images_embeddings,
-        stream = True
-    ):
-        if "[/INST]" in out:
-            result = out.split("[/INST]")[-1]
-        else:
-            result = out
-        print(result)
-        yield result
 description="""
 A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!

     print(prompt)
     # Gnerating Response
+    output = generator.generate(
         prompt = prompt,
         max_new_tokens = 1024,
         temperature = 0.15,
         decode_special_tokens = True,
         stop_conditions = [tokenizer.eos_token_id],
         gen_settings = ExLlamaV2Sampler.Settings.greedy(),
+        embeddings = images_embeddings
+    )
+    result = out.split("[/INST]")[-1]
+    print(result)
+    return result
 description="""
 A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!