Ankush Rana commited on
Commit
dc8f8ea
·
1 Parent(s): bbc05f7

add stop ids

Browse files
Files changed (1) hide show
  1. rag.py +4 -5
rag.py CHANGED
@@ -44,7 +44,6 @@ class RAG:
44
 
45
  query = f"Context:\n{context}\n\nQuestion:\n{instruction}"
46
  #sys_prompt = "You are a helpful assistant. Answer the question using only the context you are provided with. If it is not possible to do it with the context, just say 'I can't answer'. <|endoftext|>"
47
-
48
  chat_completion = client.chat.completions.create(
49
  model="tgi",
50
  messages=[
@@ -53,11 +52,11 @@ class RAG:
53
  ],
54
  max_tokens=model_parameters['max_new_tokens'], # TODO: map other parameters
55
  frequency_penalty=model_parameters['repetition_penalty'], # this doesn't appear to do much, not a replacement for repetition penalty
56
- #repetition_penalty=model_parameters['repetition_penalty'], # TODO: figure out how to pass repetition penalty
57
- extra_body={'repetition_penalty': model_parameters['repetition_penalty']},
58
- stream=False
 
59
  )
60
- print(chat_completion)
61
  return(chat_completion.choices[0].message.content)
62
 
63
 
 
44
 
45
  query = f"Context:\n{context}\n\nQuestion:\n{instruction}"
46
  #sys_prompt = "You are a helpful assistant. Answer the question using only the context you are provided with. If it is not possible to do it with the context, just say 'I can't answer'. <|endoftext|>"
 
47
  chat_completion = client.chat.completions.create(
48
  model="tgi",
49
  messages=[
 
52
  ],
53
  max_tokens=model_parameters['max_new_tokens'], # TODO: map other parameters
54
  frequency_penalty=model_parameters['repetition_penalty'], # this doesn't appear to do much, not a replacement for repetition penalty
55
+ # presence_penalty=model_parameters['repetition_penalty'],
56
+ # extra_body=model_parameters,
57
+ stream=False,
58
+ stop=["<|im_end|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token|>"]
59
  )
 
60
  return(chat_completion.choices[0].message.content)
61
 
62