radames commited on
Commit
c0a0d79
1 Parent(s): dc843c4

fix first token

Browse files
Files changed (1) hide show
  1. llama2cWorker.js +9 -2
llama2cWorker.js CHANGED
@@ -50,6 +50,7 @@ async function generate(data) {
50
  tokenizerURL,
51
  prompt,
52
  temp,
 
53
  repeatPenalty,
54
  seed,
55
  maxSeqLen,
@@ -59,11 +60,17 @@ async function generate(data) {
59
  const model = await Llama2C.getInstance(weightsURL, modelID, tokenizerURL);
60
 
61
  self.postMessage({ status: "loading", message: "Initializing model" });
62
- model.init_with_prompt(prompt, temp, repeatPenalty, seed);
 
 
 
 
 
 
63
 
64
  const seq_len = model.get_seq_len();
65
 
66
- let sentence = "";
67
  let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
68
  let startTime = performance.now();
69
  let tokensCount = 0;
 
50
  tokenizerURL,
51
  prompt,
52
  temp,
53
+ top_p,
54
  repeatPenalty,
55
  seed,
56
  maxSeqLen,
 
60
  const model = await Llama2C.getInstance(weightsURL, modelID, tokenizerURL);
61
 
62
  self.postMessage({ status: "loading", message: "Initializing model" });
63
+ const firstToken = model.init_with_prompt(
64
+ prompt,
65
+ temp,
66
+ top_p,
67
+ repeatPenalty,
68
+ seed
69
+ );
70
 
71
  const seq_len = model.get_seq_len();
72
 
73
+ let sentence = firstToken;
74
  let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1;
75
  let startTime = performance.now();
76
  let tokensCount = 0;