Spaces:

DeepMount00
/

Lexora-Lite-3B-Chat

Sleeping

DeepMount00 commited on May 3, 2024

Commit

6b8d341

•

1 Parent(s): 18f610c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,20 +34,24 @@ h1 {
 @spaces.GPU(duration=120)
 def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
-    # Creare la struttura della conversazione
-    conversation = []
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
-    # Preparare gli input per il modello
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
-    # Parametri per la generazione del testo
-    do_sample = True if temperature > 0 else False  # Usa il campionamento a meno che la temperatura non sia 0
-    real_temperature = max(temperature, 0.001)  # Evita temperatura 0 che disabilita il campionamento
-    # Generare una risposta dal modello
     generated_ids = model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
@@ -56,7 +60,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
         eos_token_id=tokenizer.eos_token_id
     )
-    # Decodificare i token generati
     decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
     prompt_end_index = decoded[0].find(message) + len(message)
     final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]

 @spaces.GPU(duration=120)
 def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
+    # Initialize the conversation with a system prompt
+    conversation = [{"role": "system", "content": "Sei un assistente specializzato nella lingua italiana. Rispondi in modo preciso e dettagliato."}]
+    # Add historical conversation
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
+    # Add the current user message to the conversation
     conversation.append({"role": "user", "content": message})
+    # Prepare the input for the model
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
+    # Parameters for generating text
+    do_sample = True if temperature > 0 else False  # Use sampling unless temperature is 0
+    real_temperature = max(temperature, 0.001)  # Avoid zero temperature which disables sampling
+    # Generate a response from the model
     generated_ids = model.generate(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
         eos_token_id=tokenizer.eos_token_id
     )
+    # Decode the generated tokens
     decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
     prompt_end_index = decoded[0].find(message) + len(message)
     final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]