Spaces:
Running
on
Zero
Running
on
Zero
Hide API and fix temp=0 case
Browse files
app.py
CHANGED
@@ -49,12 +49,15 @@ def chat_inference(image, text, temperature, top_p, top_k, max_tokens, conversat
|
|
49 |
|
50 |
generation_kwargs = {
|
51 |
"max_new_tokens": max_tokens,
|
52 |
-
"temperature": temperature,
|
53 |
"top_p": top_p,
|
54 |
"top_k": top_k,
|
55 |
"do_sample": True,
|
56 |
}
|
57 |
|
|
|
|
|
|
|
|
|
58 |
output = model.generate(**inputs, **generation_kwargs)
|
59 |
assistant_response = processor.decode(output[0], skip_special_tokens=True)
|
60 |
|
@@ -121,4 +124,4 @@ with gr.Blocks(title="Granite Vision 3.1 2B", css="h1 { overflow: hidden; }") as
|
|
121 |
)
|
122 |
|
123 |
if __name__ == "__main__":
|
124 |
-
demo.launch()
|
|
|
49 |
|
50 |
generation_kwargs = {
|
51 |
"max_new_tokens": max_tokens,
|
|
|
52 |
"top_p": top_p,
|
53 |
"top_k": top_k,
|
54 |
"do_sample": True,
|
55 |
}
|
56 |
|
57 |
+
if temperature > 0:
|
58 |
+
generation_kwargs["temperature"] = temperature
|
59 |
+
generation_kwargs["do_sample"] = True
|
60 |
+
|
61 |
output = model.generate(**inputs, **generation_kwargs)
|
62 |
assistant_response = processor.decode(output[0], skip_special_tokens=True)
|
63 |
|
|
|
124 |
)
|
125 |
|
126 |
if __name__ == "__main__":
|
127 |
+
demo.launch(show_api=False)
|