make tokens count working for non-streaming as well
Browse files
src/lib/components/InferencePlayground/InferencePlayground.svelte
CHANGED
@@ -37,7 +37,7 @@
|
|
37 |
let showTokenModal = false;
|
38 |
let loading = false;
|
39 |
let latency = 0;
|
40 |
-
let
|
41 |
let abortController: AbortController | undefined = undefined;
|
42 |
let waitForNonStreaming = true;
|
43 |
|
@@ -96,17 +96,21 @@
|
|
96 |
if (streamingMessage) {
|
97 |
streamingMessage.content = content;
|
98 |
conversation.messages = [...conversation.messages];
|
99 |
-
|
100 |
}
|
101 |
},
|
102 |
abortController
|
103 |
);
|
104 |
} else {
|
105 |
waitForNonStreaming = true;
|
106 |
-
const newMessage = await handleNonStreamingResponse(
|
|
|
|
|
|
|
107 |
// check if the user did not abort the request
|
108 |
if (waitForNonStreaming) {
|
109 |
conversation.messages = [...conversation.messages, newMessage];
|
|
|
110 |
}
|
111 |
}
|
112 |
|
@@ -206,7 +210,7 @@
|
|
206 |
<IconDelete />
|
207 |
</button>
|
208 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
209 |
-
<span class="max-xl:hidden">{
|
210 |
</div>
|
211 |
<button
|
212 |
type="button"
|
|
|
37 |
let showTokenModal = false;
|
38 |
let loading = false;
|
39 |
let latency = 0;
|
40 |
+
let generatedTokensCount = 0;
|
41 |
let abortController: AbortController | undefined = undefined;
|
42 |
let waitForNonStreaming = true;
|
43 |
|
|
|
96 |
if (streamingMessage) {
|
97 |
streamingMessage.content = content;
|
98 |
conversation.messages = [...conversation.messages];
|
99 |
+
generatedTokensCount += 1;
|
100 |
}
|
101 |
},
|
102 |
abortController
|
103 |
);
|
104 |
} else {
|
105 |
waitForNonStreaming = true;
|
106 |
+
const { message: newMessage, completion_tokens: newTokensCount } = await handleNonStreamingResponse(
|
107 |
+
hf,
|
108 |
+
conversation
|
109 |
+
);
|
110 |
// check if the user did not abort the request
|
111 |
if (waitForNonStreaming) {
|
112 |
conversation.messages = [...conversation.messages, newMessage];
|
113 |
+
generatedTokensCount += newTokensCount;
|
114 |
}
|
115 |
}
|
116 |
|
|
|
210 |
<IconDelete />
|
211 |
</button>
|
212 |
<div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
|
213 |
+
<span class="max-xl:hidden">{generatedTokensCount} tokens · Latency {latency}ms</span>
|
214 |
</div>
|
215 |
<button
|
216 |
type="button"
|
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts
CHANGED
@@ -38,7 +38,7 @@ export async function handleStreamingResponse(
|
|
38 |
export async function handleNonStreamingResponse(
|
39 |
hf: HfInference,
|
40 |
conversation: Conversation
|
41 |
-
): Promise<ChatCompletionInputMessage> {
|
42 |
const { model, systemMessage } = conversation;
|
43 |
const messages = [
|
44 |
...(isSystemPromptSupported(model) && systemMessage.content?.length ? [systemMessage] : []),
|
@@ -53,7 +53,9 @@ export async function handleNonStreamingResponse(
|
|
53 |
});
|
54 |
|
55 |
if (response.choices && response.choices.length > 0) {
|
56 |
-
|
|
|
|
|
57 |
}
|
58 |
throw new Error("No response from the model");
|
59 |
}
|
|
|
38 |
export async function handleNonStreamingResponse(
|
39 |
hf: HfInference,
|
40 |
conversation: Conversation
|
41 |
+
): Promise<{ message: ChatCompletionInputMessage; completion_tokens: number }> {
|
42 |
const { model, systemMessage } = conversation;
|
43 |
const messages = [
|
44 |
...(isSystemPromptSupported(model) && systemMessage.content?.length ? [systemMessage] : []),
|
|
|
53 |
});
|
54 |
|
55 |
if (response.choices && response.choices.length > 0) {
|
56 |
+
const { message } = response.choices[0];
|
57 |
+
const { completion_tokens } = response.usage;
|
58 |
+
return { message, completion_tokens };
|
59 |
}
|
60 |
throw new Error("No response from the model");
|
61 |
}
|