Commit
·
d9b3577
1
Parent(s):
6b225ba
try to boost CPU usage
Browse files- public/index.html +1 -1
- src/index.mts +3 -3
public/index.html
CHANGED
|
@@ -39,7 +39,7 @@
|
|
| 39 |
</button>
|
| 40 |
<span class="py-3" x-show="state === 'loading'">Waiting for the stream to begin (might take a few minutes)..</span>
|
| 41 |
<span class="py-3" x-show="state === 'streaming'">
|
| 42 |
-
Streamed <span x-text="humanFileSize(size, true, 2)"></span> so far<br/> (hang on, this may take
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
</div>
|
|
|
|
| 39 |
</button>
|
| 40 |
<span class="py-3" x-show="state === 'loading'">Waiting for the stream to begin (might take a few minutes)..</span>
|
| 41 |
<span class="py-3" x-show="state === 'streaming'">
|
| 42 |
+
Streamed <span x-text="humanFileSize(size, true, 2)"></span> so far<br/> (hang on, this may take 5-15 minutes ☕)</span>
|
| 43 |
</div>
|
| 44 |
</div>
|
| 45 |
</div>
|
src/index.mts
CHANGED
|
@@ -37,7 +37,7 @@ const app = express()
|
|
| 37 |
const port = 7860
|
| 38 |
|
| 39 |
const minPromptSize = 16 // if you change this, you will need to also change in public/index.html
|
| 40 |
-
const timeoutInSec =
|
| 41 |
|
| 42 |
app.use(express.static("public"))
|
| 43 |
|
|
@@ -93,7 +93,7 @@ app.get("/app", async (req, res) => {
|
|
| 93 |
|
| 94 |
// naive implementation: we say we are out of capacity
|
| 95 |
if (pending.queue.length >= maxParallelRequests) {
|
| 96 |
-
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in <
|
| 97 |
res.end()
|
| 98 |
return
|
| 99 |
}
|
|
@@ -139,7 +139,7 @@ ${prefix}`
|
|
| 139 |
|
| 140 |
const options = {
|
| 141 |
prompt: finalPrompt,
|
| 142 |
-
nThreads:
|
| 143 |
nTokPredict: 1024,
|
| 144 |
topK: 40,
|
| 145 |
topP: 0.1,
|
|
|
|
| 37 |
const port = 7860
|
| 38 |
|
| 39 |
const minPromptSize = 16 // if you change this, you will need to also change in public/index.html
|
| 40 |
+
const timeoutInSec = 15 * 60
|
| 41 |
|
| 42 |
app.use(express.static("public"))
|
| 43 |
|
|
|
|
| 93 |
|
| 94 |
// naive implementation: we say we are out of capacity
|
| 95 |
if (pending.queue.length >= maxParallelRequests) {
|
| 96 |
+
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in < 15 min.')
|
| 97 |
res.end()
|
| 98 |
return
|
| 99 |
}
|
|
|
|
| 139 |
|
| 140 |
const options = {
|
| 141 |
prompt: finalPrompt,
|
| 142 |
+
nThreads: 6, // try to use the most of our vCPUs
|
| 143 |
nTokPredict: 1024,
|
| 144 |
topK: 40,
|
| 145 |
topP: 0.1,
|