Spaces:
Running
on
L4
Running
on
L4
Update app.py
Browse files
app.py
CHANGED
@@ -83,7 +83,8 @@ def get_generation_speed():
|
|
83 |
return generation_speed
|
84 |
|
85 |
@observe()
|
86 |
-
def log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample,
|
|
|
87 |
return "".join(model_outputs)
|
88 |
|
89 |
|
@@ -144,9 +145,8 @@ def generate(
|
|
144 |
|
145 |
generation_speed = token_per_second_calculator(sum_tokens, time_delta)
|
146 |
|
147 |
-
log_function = log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample,
|
148 |
|
149 |
-
print(f"generation_speed: {generation_speed}")
|
150 |
|
151 |
|
152 |
|
|
|
83 |
return generation_speed
|
84 |
|
85 |
@observe()
|
86 |
+
def log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, generation_speed, model_outputs):
|
87 |
+
print(f"generation_speed: {generation_speed}")
|
88 |
return "".join(model_outputs)
|
89 |
|
90 |
|
|
|
145 |
|
146 |
generation_speed = token_per_second_calculator(sum_tokens, time_delta)
|
147 |
|
148 |
+
log_function = log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, generation_speed, outputs)
|
149 |
|
|
|
150 |
|
151 |
|
152 |
|