abrakjamson commited on
Commit
434becc
·
1 Parent(s): bd9fdbb

Reducing default max new tokens to speed up responses on CPU

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -568,7 +568,7 @@ with gr.Blocks(
568
  else:
569
  gr.Markdown("""# 🧠 LLM Mind Control ((Llama 3.2 1B))
570
 
571
- *Warning: although using a small model, running on CPU will still be very slow*""")
572
  gr.Markdown("""Unlike prompting, direct weight manipulation lets you fine-tune the amount of a personality
573
  trait or topic. Enabled through [Representation Engineering](https://arxiv.org/abs/2310.01405)
574
  via the [repeng](https://pypi.org/project/repeng) library.
@@ -670,7 +670,7 @@ with gr.Blocks(
670
  </div>
671
  """)
672
  max_new_tokens = gr.Number(
673
- value=192,
674
  precision=0,
675
  step=10,
676
  show_label=False
 
568
  else:
569
  gr.Markdown("""# 🧠 LLM Mind Control ((Llama 3.2 1B))
570
 
571
+ *Warning: although using a small model, running on CPU will still be very slow (30+ seconds to first token)*""")
572
  gr.Markdown("""Unlike prompting, direct weight manipulation lets you fine-tune the amount of a personality
573
  trait or topic. Enabled through [Representation Engineering](https://arxiv.org/abs/2310.01405)
574
  via the [repeng](https://pypi.org/project/repeng) library.
 
670
  </div>
671
  """)
672
  max_new_tokens = gr.Number(
673
+ value=128,
674
  precision=0,
675
  step=10,
676
  show_label=False