DeepMount00 commited on
Commit
6b8d341
1 Parent(s): 18f610c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -34,20 +34,24 @@ h1 {
34
 
35
  @spaces.GPU(duration=120)
36
  def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
37
- # Creare la struttura della conversazione
38
- conversation = []
 
 
39
  for user, assistant in history:
40
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
 
 
41
  conversation.append({"role": "user", "content": message})
42
 
43
- # Preparare gli input per il modello
44
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
45
 
46
- # Parametri per la generazione del testo
47
- do_sample = True if temperature > 0 else False # Usa il campionamento a meno che la temperatura non sia 0
48
- real_temperature = max(temperature, 0.001) # Evita temperatura 0 che disabilita il campionamento
49
 
50
- # Generare una risposta dal modello
51
  generated_ids = model.generate(
52
  input_ids=input_ids,
53
  max_new_tokens=max_new_tokens,
@@ -56,7 +60,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
56
  eos_token_id=tokenizer.eos_token_id
57
  )
58
 
59
- # Decodificare i token generati
60
  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
61
  prompt_end_index = decoded[0].find(message) + len(message)
62
  final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]
 
34
 
35
  @spaces.GPU(duration=120)
36
  def chat_llama3_8b(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
37
+ # Initialize the conversation with a system prompt
38
+ conversation = [{"role": "system", "content": "Sei un assistente specializzato nella lingua italiana. Rispondi in modo preciso e dettagliato."}]
39
+
40
+ # Add historical conversation
41
  for user, assistant in history:
42
  conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
43
+
44
+ # Add the current user message to the conversation
45
  conversation.append({"role": "user", "content": message})
46
 
47
+ # Prepare the input for the model
48
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
49
 
50
+ # Parameters for generating text
51
+ do_sample = True if temperature > 0 else False # Use sampling unless temperature is 0
52
+ real_temperature = max(temperature, 0.001) # Avoid zero temperature which disables sampling
53
 
54
+ # Generate a response from the model
55
  generated_ids = model.generate(
56
  input_ids=input_ids,
57
  max_new_tokens=max_new_tokens,
 
60
  eos_token_id=tokenizer.eos_token_id
61
  )
62
 
63
+ # Decode the generated tokens
64
  decoded = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
65
  prompt_end_index = decoded[0].find(message) + len(message)
66
  final_response = decoded[0][prompt_end_index:] if prompt_end_index != -1 else decoded[0]