Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -45,9 +45,9 @@ model = Llama(
|
|
45 |
n_parts=1,
|
46 |
#n_batch=100,
|
47 |
logits_all=True,
|
48 |
-
|
49 |
verbose=True,
|
50 |
-
|
51 |
n_gqa=8 #must be set for 70b models
|
52 |
)
|
53 |
|
@@ -129,12 +129,12 @@ def generate_search_request():
|
|
129 |
parameters = data.get("parameters", {})
|
130 |
|
131 |
# Extract parameters from the request
|
132 |
-
temperature = 0.01
|
133 |
truncate = parameters.get("truncate", 1000)
|
134 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
135 |
-
top_p = 0.
|
136 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
137 |
-
top_k =
|
138 |
return_full_text = parameters.get("return_full_text", False)
|
139 |
|
140 |
|
@@ -168,12 +168,12 @@ def generate_response():
|
|
168 |
parameters = data.get("parameters", {})
|
169 |
|
170 |
# Extract parameters from the request
|
171 |
-
temperature =
|
172 |
truncate = parameters.get("truncate", 1000)
|
173 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
174 |
-
top_p =
|
175 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
176 |
-
top_k =
|
177 |
return_full_text = parameters.get("return_full_text", False)
|
178 |
|
179 |
|
|
|
45 |
n_parts=1,
|
46 |
#n_batch=100,
|
47 |
logits_all=True,
|
48 |
+
n_threads=12,
|
49 |
verbose=True,
|
50 |
+
n_gpu_layers=35,
|
51 |
n_gqa=8 #must be set for 70b models
|
52 |
)
|
53 |
|
|
|
129 |
parameters = data.get("parameters", {})
|
130 |
|
131 |
# Extract parameters from the request
|
132 |
+
temperature = parameters.get("temperature", 0.01)
|
133 |
truncate = parameters.get("truncate", 1000)
|
134 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
135 |
+
top_p = parameters.get("top_p", 0.85)
|
136 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
137 |
+
top_k = parameters.get("top_k", 30)
|
138 |
return_full_text = parameters.get("return_full_text", False)
|
139 |
|
140 |
|
|
|
168 |
parameters = data.get("parameters", {})
|
169 |
|
170 |
# Extract parameters from the request
|
171 |
+
temperature = parameters.get("temperature", 0.01)
|
172 |
truncate = parameters.get("truncate", 1000)
|
173 |
max_new_tokens = parameters.get("max_new_tokens", 1024)
|
174 |
+
top_p = parameters.get("top_p", 0.85)
|
175 |
repetition_penalty = parameters.get("repetition_penalty", 1.2)
|
176 |
+
top_k = parameters.get("top_k", 30)
|
177 |
return_full_text = parameters.get("return_full_text", False)
|
178 |
|
179 |
|