mishig HF staff commited on
Commit
51a1671
1 Parent(s): f784d2f

make tokens count working for non-streaming as well

Browse files
src/lib/components/InferencePlayground/InferencePlayground.svelte CHANGED
@@ -37,7 +37,7 @@
37
  let showTokenModal = false;
38
  let loading = false;
39
  let latency = 0;
40
- let tokensCount = 0;
41
  let abortController: AbortController | undefined = undefined;
42
  let waitForNonStreaming = true;
43
 
@@ -96,17 +96,21 @@
96
  if (streamingMessage) {
97
  streamingMessage.content = content;
98
  conversation.messages = [...conversation.messages];
99
- tokensCount += 1;
100
  }
101
  },
102
  abortController
103
  );
104
  } else {
105
  waitForNonStreaming = true;
106
- const newMessage = await handleNonStreamingResponse(hf, conversation);
 
 
 
107
  // check if the user did not abort the request
108
  if (waitForNonStreaming) {
109
  conversation.messages = [...conversation.messages, newMessage];
 
110
  }
111
  }
112
 
@@ -206,7 +210,7 @@
206
  <IconDelete />
207
  </button>
208
  <div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
209
- <span class="max-xl:hidden">{tokensCount} tokens · Latency {latency}ms</span>
210
  </div>
211
  <button
212
  type="button"
 
37
  let showTokenModal = false;
38
  let loading = false;
39
  let latency = 0;
40
+ let generatedTokensCount = 0;
41
  let abortController: AbortController | undefined = undefined;
42
  let waitForNonStreaming = true;
43
 
 
96
  if (streamingMessage) {
97
  streamingMessage.content = content;
98
  conversation.messages = [...conversation.messages];
99
+ generatedTokensCount += 1;
100
  }
101
  },
102
  abortController
103
  );
104
  } else {
105
  waitForNonStreaming = true;
106
+ const { message: newMessage, completion_tokens: newTokensCount } = await handleNonStreamingResponse(
107
+ hf,
108
+ conversation
109
+ );
110
  // check if the user did not abort the request
111
  if (waitForNonStreaming) {
112
  conversation.messages = [...conversation.messages, newMessage];
113
+ generatedTokensCount += newTokensCount;
114
  }
115
  }
116
 
 
210
  <IconDelete />
211
  </button>
212
  <div class="flex-1 items-center justify-center text-center text-sm text-gray-500">
213
+ <span class="max-xl:hidden">{generatedTokensCount} tokens · Latency {latency}ms</span>
214
  </div>
215
  <button
216
  type="button"
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts CHANGED
@@ -38,7 +38,7 @@ export async function handleStreamingResponse(
38
  export async function handleNonStreamingResponse(
39
  hf: HfInference,
40
  conversation: Conversation
41
- ): Promise<ChatCompletionInputMessage> {
42
  const { model, systemMessage } = conversation;
43
  const messages = [
44
  ...(isSystemPromptSupported(model) && systemMessage.content?.length ? [systemMessage] : []),
@@ -53,7 +53,9 @@ export async function handleNonStreamingResponse(
53
  });
54
 
55
  if (response.choices && response.choices.length > 0) {
56
- return response.choices[0].message;
 
 
57
  }
58
  throw new Error("No response from the model");
59
  }
 
38
  export async function handleNonStreamingResponse(
39
  hf: HfInference,
40
  conversation: Conversation
41
+ ): Promise<{ message: ChatCompletionInputMessage; completion_tokens: number }> {
42
  const { model, systemMessage } = conversation;
43
  const messages = [
44
  ...(isSystemPromptSupported(model) && systemMessage.content?.length ? [systemMessage] : []),
 
53
  });
54
 
55
  if (response.choices && response.choices.length > 0) {
56
+ const { message } = response.choices[0];
57
+ const { completion_tokens } = response.usage;
58
+ return { message, completion_tokens };
59
  }
60
  throw new Error("No response from the model");
61
  }