Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
import spaces
|
2 |
import json
|
3 |
import subprocess
|
4 |
from llama_cpp import Llama
|
@@ -22,7 +21,6 @@ hf_hub_download(
|
|
22 |
def get_messages_formatter_type(model_name):
|
23 |
return MessagesFormatterType.LLAMA_3
|
24 |
|
25 |
-
@spaces.GPU
|
26 |
def respond(
|
27 |
message,
|
28 |
history: list[tuple[str, str]],
|
@@ -42,10 +40,9 @@ def respond(
|
|
42 |
if llm is None or llm_model != model:
|
43 |
llm = Llama(
|
44 |
model_path=f"models/{model}",
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
n_ctx=8192,
|
49 |
)
|
50 |
llm_model = model
|
51 |
|
@@ -110,8 +107,8 @@ demo = gr.ChatInterface(
|
|
110 |
value="llama-3.2-1b-instruct-q4_k_m.gguf",
|
111 |
label="Model"
|
112 |
),
|
113 |
-
gr.Textbox(value="You are a world-class AI system
|
114 |
-
gr.Slider(minimum=1, maximum=
|
115 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
116 |
gr.Slider(
|
117 |
minimum=0.1,
|
@@ -148,17 +145,16 @@ demo = gr.ChatInterface(
|
|
148 |
color_accent_soft_dark="transparent",
|
149 |
code_background_fill_dark="#292733",
|
150 |
),
|
151 |
-
retry_btn="Retry",
|
152 |
-
undo_btn="Undo",
|
153 |
-
clear_btn="Clear",
|
154 |
-
submit_btn="Send",
|
155 |
title="Meta Llama 3.2 (1B)",
|
156 |
description=description,
|
157 |
chatbot=gr.Chatbot(
|
158 |
scale=1,
|
159 |
-
likeable=
|
160 |
show_copy_button=True
|
161 |
-
)
|
|
|
|
|
|
|
162 |
)
|
163 |
|
164 |
if __name__ == "__main__":
|
|
|
|
|
1 |
import json
|
2 |
import subprocess
|
3 |
from llama_cpp import Llama
|
|
|
21 |
def get_messages_formatter_type(model_name):
|
22 |
return MessagesFormatterType.LLAMA_3
|
23 |
|
|
|
24 |
def respond(
|
25 |
message,
|
26 |
history: list[tuple[str, str]],
|
|
|
40 |
if llm is None or llm_model != model:
|
41 |
llm = Llama(
|
42 |
model_path=f"models/{model}",
|
43 |
+
n_gpu_layers=0, # Set to 0 for CPU
|
44 |
+
n_batch=512, # Reduced batch size for CPU
|
45 |
+
n_ctx=2048, # Reduced context size for CPU
|
|
|
46 |
)
|
47 |
llm_model = model
|
48 |
|
|
|
107 |
value="llama-3.2-1b-instruct-q4_k_m.gguf",
|
108 |
label="Model"
|
109 |
),
|
110 |
+
gr.Textbox(value="You are a world-class AI system named Meta Llama 3.2 (1B). You are capable of complex reasoning, reflecting on your thoughts, and providing detailed and accurate responses. You are designed to excel in conversational dialogue, agentic retrieval, and summarization tasks. You can understand and generate text in multiple languages. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.", label="System message"),
|
111 |
+
gr.Slider(minimum=1, maximum=2048, value=1024, step=1, label="Max tokens"),
|
112 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
113 |
gr.Slider(
|
114 |
minimum=0.1,
|
|
|
145 |
color_accent_soft_dark="transparent",
|
146 |
code_background_fill_dark="#292733",
|
147 |
),
|
|
|
|
|
|
|
|
|
148 |
title="Meta Llama 3.2 (1B)",
|
149 |
description=description,
|
150 |
chatbot=gr.Chatbot(
|
151 |
scale=1,
|
152 |
+
likeable=True,
|
153 |
show_copy_button=True
|
154 |
+
),
|
155 |
+
cache_examples=False,
|
156 |
+
autofocus=False,
|
157 |
+
concurrency_limit=10
|
158 |
)
|
159 |
|
160 |
if __name__ == "__main__":
|