Nekochu commited on
Commit
da78e50
1 Parent(s): ca1953e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -21,11 +21,15 @@ LICENSE = """
21
  ---.
22
  """
23
 
 
 
24
  def load_model(model_id):
25
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
26
- tokenizer = AutoTokenizer.from_pretrained(model_id)
27
- tokenizer.use_default_system_prompt = False
28
- return model, tokenizer
 
 
29
 
30
  if not torch.cuda.is_available():
31
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
@@ -86,7 +90,7 @@ chat_interface = gr.ChatInterface(
86
  fn=generate,
87
  additional_inputs=[
88
  gr.Textbox(label="System prompt", lines=6),
89
- gr.Textbox(label="Model ID", placeholder="Nekochu/Luminia-13B-v3"),
90
  gr.Slider(
91
  label="Max new tokens",
92
  minimum=1,
@@ -137,4 +141,4 @@ with gr.Blocks(css="style.css") as demo:
137
  gr.Markdown(LICENSE)
138
 
139
  if __name__ == "__main__":
140
- demo.queue(max_size=20).launch()
 
21
  ---.
22
  """
23
 
24
+ models_cache = {}
25
+
26
  def load_model(model_id):
27
+ if model_id not in models_cache:
28
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
29
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
30
+ tokenizer.use_default_system_prompt = False
31
+ models_cache[model_id] = (model, tokenizer)
32
+ return models_cache[model_id]
33
 
34
  if not torch.cuda.is_available():
35
  DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 
90
  fn=generate,
91
  additional_inputs=[
92
  gr.Textbox(label="System prompt", lines=6),
93
+ gr.Textbox(label="Model ID", default="Nekochu/Luminia-13B-v3"),
94
  gr.Slider(
95
  label="Max new tokens",
96
  minimum=1,
 
141
  gr.Markdown(LICENSE)
142
 
143
  if __name__ == "__main__":
144
+ demo.queue(max_size=20).launch()