Spaces:

Do0rMaMu
/

LLaMa3-assistant

Sleeping

Do0rMaMu commited on May 24

Commit

7bb2e96

•

1 Parent(s): 465d646

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -13,13 +13,13 @@ llm = Llama(
 # Pydantic object for validation
 class Validation(BaseModel):
-    user_prompt: str  # User's input prompt
-    system_prompt: str  # System's guiding prompt
-    max_tokens: int = 1024,
-    temperature: float = 0.01,
-    top_p: float = 0.9,
-    repeat_penalty: float = 1.1,
-    top_k: int = 40
 # FastAPI application initialization
 app = FastAPI()
@@ -33,16 +33,7 @@ async def generate_response(item: Validation):
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
-    max_tokens = int(item.max_tokens)
-    temperature = float(item.temperature)
-    top_p = float(item.top_p)
-    repeat_penalty = float(item.repeat_penalty)  # Explicitly cast to float
-    top_k = int(item.top_k)
-    # Call the Llama model to generate a response
-    output = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p,
-                 repeat_penalty=repeat_penalty, top_k=top_k, echo=True)
     # Extract and return the text from the response
-    return output['choices'][0]['text']

 # Pydantic object for validation
 class Validation(BaseModel):
+    user_prompt: str
+    system_prompt: str
+    max_tokens = 1024
+    temperature = 0.001
+    top_p = 0.9
+    repeat_penalty = 1.1
+    top_k = 40
 # FastAPI application initialization
 app = FastAPI()
 { item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>"""
     # Call the Llama model to generate a response
+    output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature , top_p = item.top_p , repeat_penalty = item.repeat_penalty, top_k = item.top_k ,echo=True)  # Update parameters as needed
     # Extract and return the text from the response
+    return output['choices'][0]['text']