jbilcke-hf HF staff commited on
Commit
6c8acb9
1 Parent(s): 610725e

simplified parallelism system

Browse files
Files changed (1) hide show
  1. src/index.mts +5 -17
src/index.mts CHANGED
@@ -39,11 +39,9 @@ const maxParallelRequests = 1
39
  const pending: {
40
  total: number;
41
  queue: string[];
42
- aborts: Record<string, any>,
43
  } = {
44
  total: 0,
45
  queue: [],
46
- aborts: {},
47
  }
48
 
49
  const endRequest = (id: string, reason: string) => {
@@ -51,20 +49,7 @@ const endRequest = (id: string, reason: string) => {
51
  return
52
  }
53
 
54
- // politely ask the LLM to stop
55
- try {
56
- pending.aborts[id].abort()
57
- } catch (err) {
58
- console.log(`could not abort request ${id} (${err})`)
59
- }
60
- // remove the request from everywhere
61
- try {
62
- pending.queue = pending.queue.filter(i => i !== id)
63
- delete pending.aborts[id]
64
- console.log(`cleaned up request ${id}`)
65
- } catch (err) {
66
- console.log(`failed to properly clean up request ${id}`)
67
- }
68
  console.log(`request ${id} ended (${reason})`)
69
  }
70
 
@@ -93,7 +78,6 @@ app.get("/", async (req, res) => {
93
  console.log(`new request ${id}`)
94
 
95
  pending.queue.push(id)
96
- pending.aborts[id] = new AbortController()
97
 
98
  const prefix = `<html><head>${css}${script}`
99
  res.write(prefix)
@@ -125,7 +109,11 @@ ${prefix}`
125
  const inputTokens = await llm.tokenize(finalPrompt)
126
  console.log("initializing the generator (may take 30s or more)")
127
  const generator = await llm.generate(inputTokens)
 
128
  for await (const token of generator) {
 
 
 
129
  const tmp = await llm.detokenize(token)
130
  process.stdout.write(tmp)
131
  res.write(tmp)
 
39
  const pending: {
40
  total: number;
41
  queue: string[];
 
42
  } = {
43
  total: 0,
44
  queue: [],
 
45
  }
46
 
47
  const endRequest = (id: string, reason: string) => {
 
49
  return
50
  }
51
 
52
+ pending.queue = pending.queue.filter(i => i !== id)
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  console.log(`request ${id} ended (${reason})`)
54
  }
55
 
 
78
  console.log(`new request ${id}`)
79
 
80
  pending.queue.push(id)
 
81
 
82
  const prefix = `<html><head>${css}${script}`
83
  res.write(prefix)
 
109
  const inputTokens = await llm.tokenize(finalPrompt)
110
  console.log("initializing the generator (may take 30s or more)")
111
  const generator = await llm.generate(inputTokens)
112
+
113
  for await (const token of generator) {
114
+ if (!pending.queue.includes(id)) {
115
+ break
116
+ }
117
  const tmp = await llm.detokenize(token)
118
  process.stdout.write(tmp)
119
  res.write(tmp)