pandora-s commited on
Commit
305e0ae
1 Parent(s): c99b37c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -113,7 +113,7 @@ def run_inference(message, history, model_picked):
113
  print(prompt)
114
 
115
  # Gnerating Response
116
- for out in generator.generate(
117
  prompt = prompt,
118
  max_new_tokens = 1024,
119
  temperature = 0.15,
@@ -122,15 +122,11 @@ def run_inference(message, history, model_picked):
122
  decode_special_tokens = True,
123
  stop_conditions = [tokenizer.eos_token_id],
124
  gen_settings = ExLlamaV2Sampler.Settings.greedy(),
125
- embeddings = images_embeddings,
126
- stream = True
127
- ):
128
- if "[/INST]" in out:
129
- result = out.split("[/INST]")[-1]
130
- else:
131
- result = out
132
- print(result)
133
- yield result
134
 
135
  description="""
136
  A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
 
113
  print(prompt)
114
 
115
  # Gnerating Response
116
+ output = generator.generate(
117
  prompt = prompt,
118
  max_new_tokens = 1024,
119
  temperature = 0.15,
 
122
  decode_special_tokens = True,
123
  stop_conditions = [tokenizer.eos_token_id],
124
  gen_settings = ExLlamaV2Sampler.Settings.greedy(),
125
+ embeddings = images_embeddings
126
+ )
127
+ result = out.split("[/INST]")[-1]
128
+ print(result)
129
+ return result
 
 
 
 
130
 
131
  description="""
132
  A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!