"use_cache: false" for all requests
Browse files
src/lib/components/InferencePlayground/inferencePlaygroundUtils.ts
CHANGED
@@ -26,7 +26,7 @@ export async function handleStreamingResponse(
|
|
26 |
temperature: conversation.config.temperature,
|
27 |
max_tokens: conversation.config.maxTokens,
|
28 |
},
|
29 |
-
{ signal: abortController.signal }
|
30 |
)) {
|
31 |
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
|
32 |
out += chunk.choices[0].delta.content;
|
@@ -45,12 +45,15 @@ export async function handleNonStreamingResponse(
|
|
45 |
...conversation.messages,
|
46 |
];
|
47 |
|
48 |
-
const response = await hf.chatCompletion(
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
54 |
|
55 |
if (response.choices && response.choices.length > 0) {
|
56 |
const { message } = response.choices[0];
|
|
|
26 |
temperature: conversation.config.temperature,
|
27 |
max_tokens: conversation.config.maxTokens,
|
28 |
},
|
29 |
+
{ signal: abortController.signal, use_cache: false }
|
30 |
)) {
|
31 |
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
|
32 |
out += chunk.choices[0].delta.content;
|
|
|
45 |
...conversation.messages,
|
46 |
];
|
47 |
|
48 |
+
const response = await hf.chatCompletion(
|
49 |
+
{
|
50 |
+
model: model.id,
|
51 |
+
messages,
|
52 |
+
temperature: conversation.config.temperature,
|
53 |
+
max_tokens: conversation.config.maxTokens,
|
54 |
+
},
|
55 |
+
{ use_cache: false }
|
56 |
+
);
|
57 |
|
58 |
if (response.choices && response.choices.length > 0) {
|
59 |
const { message } = response.choices[0];
|