Marroco93 commited on
Commit
f2b775d
1 Parent(s): bc5e3f5
Files changed (1) hide show
  1. main.py +12 -8
main.py CHANGED
@@ -10,8 +10,8 @@ import torch
10
 
11
  app = FastAPI()
12
 
13
- # Initialize the InferenceClient with your model
14
- client = InferenceClient("meta-llama/Llama-2-7b-chat")
15
 
16
  class Item(BaseModel):
17
  prompt: str
@@ -23,12 +23,16 @@ class Item(BaseModel):
23
  repetition_penalty: float = 1.0
24
 
25
  def format_prompt(message, history):
26
- # Simple structure: alternating lines of dialogue, no special tokens unless specified by the model documentation
27
- conversation = ""
28
- for user_prompt, bot_response in history:
29
- conversation += f"User: {user_prompt}\nBot: {bot_response}\n"
30
- conversation += f"User: {message}"
31
- return conversation
 
 
 
 
32
 
33
 
34
 
 
10
 
11
  app = FastAPI()
12
 
13
+ # Initialize the InferenceClient with the Gemma-7b model
14
+ client = InferenceClient("google/gemma-7b")
15
 
16
  class Item(BaseModel):
17
  prompt: str
 
23
  repetition_penalty: float = 1.0
24
 
25
  def format_prompt(message, history):
26
+ prompt = "<bos>"
27
+ # Add history to the prompt if there's any
28
+ if history:
29
+ for entry in history:
30
+ role = "user" if entry['role'] == "user" else "model"
31
+ prompt += f"<start_of_turn>{role}\n{entry['content']}<end_of_turn>"
32
+ # Add the current message
33
+ prompt += f"<start_of_turn>user\n{message}<end_of_turn><start_of_turn>model\n"
34
+ return prompt
35
+
36
 
37
 
38