Spaces:

Do0rMaMu
/

LLaMa3-assistant

Sleeping

Do0rMaMu commited on May 24

Commit

a35a5a3

•

1 Parent(s): 7bb2e96

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -15,11 +15,8 @@ llm = Llama(
 class Validation(BaseModel):
     user_prompt: str
     system_prompt: str
-    max_tokens = 1024
-    temperature = 0.001
-    top_p = 0.9
-    repeat_penalty = 1.1
-    top_k = 40
 # FastAPI application initialization
 app = FastAPI()
@@ -33,7 +30,7 @@ async def generate_response(item: Validation):
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
-    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature , top_p = item.top_p , repeat_penalty = item.repeat_penalty, top_k = item.top_k ,echo=True)  # Update parameters as needed
     # Extract and return the text from the response
     return output['choices'][0]['text']

 class Validation(BaseModel):
     user_prompt: str
     system_prompt: str
+    max_tokens: int = 1024
+    temperature: float = 0.01
 # FastAPI application initialization
 app = FastAPI()
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
+    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature, echo=True)
     # Extract and return the text from the response
     return output['choices'][0]['text']