KingNish commited on
Commit
2242886
·
verified ·
1 Parent(s): c9fb560

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import spaces
2
  import json
3
  import subprocess
4
  from llama_cpp import Llama
@@ -22,7 +21,6 @@ hf_hub_download(
22
  def get_messages_formatter_type(model_name):
23
  return MessagesFormatterType.LLAMA_3
24
 
25
- @spaces.GPU
26
  def respond(
27
  message,
28
  history: list[tuple[str, str]],
@@ -42,10 +40,9 @@ def respond(
42
  if llm is None or llm_model != model:
43
  llm = Llama(
44
  model_path=f"models/{model}",
45
- flash_attn=True,
46
- n_gpu_layers=81,
47
- n_batch=1024,
48
- n_ctx=8192,
49
  )
50
  llm_model = model
51
 
@@ -110,8 +107,8 @@ demo = gr.ChatInterface(
110
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
111
  label="Model"
112
  ),
113
- gr.Textbox(value="You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.", label="System message"),
114
- gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
115
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
116
  gr.Slider(
117
  minimum=0.1,
@@ -148,17 +145,16 @@ demo = gr.ChatInterface(
148
  color_accent_soft_dark="transparent",
149
  code_background_fill_dark="#292733",
150
  ),
151
- retry_btn="Retry",
152
- undo_btn="Undo",
153
- clear_btn="Clear",
154
- submit_btn="Send",
155
  title="Meta Llama 3.2 (1B)",
156
  description=description,
157
  chatbot=gr.Chatbot(
158
  scale=1,
159
- likeable=False,
160
  show_copy_button=True
161
- )
 
 
 
162
  )
163
 
164
  if __name__ == "__main__":
 
 
1
  import json
2
  import subprocess
3
  from llama_cpp import Llama
 
21
  def get_messages_formatter_type(model_name):
22
  return MessagesFormatterType.LLAMA_3
23
 
 
24
  def respond(
25
  message,
26
  history: list[tuple[str, str]],
 
40
  if llm is None or llm_model != model:
41
  llm = Llama(
42
  model_path=f"models/{model}",
43
+ n_gpu_layers=0, # Set to 0 for CPU
44
+ n_batch=512, # Reduced batch size for CPU
45
+ n_ctx=2048, # Reduced context size for CPU
 
46
  )
47
  llm_model = model
48
 
 
107
  value="llama-3.2-1b-instruct-q4_k_m.gguf",
108
  label="Model"
109
  ),
110
+ gr.Textbox(value="You are a world-class AI system named Meta Llama 3.2 (1B). You are capable of complex reasoning, reflecting on your thoughts, and providing detailed and accurate responses. You are designed to excel in conversational dialogue, agentic retrieval, and summarization tasks. You can understand and generate text in multiple languages. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.", label="System message"),
111
+ gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
112
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
113
  gr.Slider(
114
  minimum=0.1,
 
145
  color_accent_soft_dark="transparent",
146
  code_background_fill_dark="#292733",
147
  ),
 
 
 
 
148
  title="Meta Llama 3.2 (1B)",
149
  description=description,
150
  chatbot=gr.Chatbot(
151
  scale=1,
152
+ likeable=True,
153
  show_copy_button=True
154
+ ),
155
+ cache_examples=False,
156
+ autofocus=False,
157
+ concurrency_limit=10
158
  )
159
 
160
  if __name__ == "__main__":