Spaces:

artificialguybr
/

OPENHERMES-V2.5-DEMO

Sleeping

artificialguybr commited on Oct 20, 2023

Commit

8312b78

•

1 Parent(s): a85a91b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,29 +34,25 @@ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetit
     # A última mensagem do usuário
     user_prompt = history[-1][0]
-    # Definindo o template e o prompt
-    prompt_template = f'''system
-    {system_message.strip()}
-    user
-    {user_prompt}
-    assistant
-    '''
-    # Preparando o input
-    input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids  # .cuda() se você estiver usando GPU
-    # Gerar a saída
-    output = model.generate(input_ids=input_ids, temperature=temperature, do_sample=True, top_p=top_p, top_k=top_k, max_length=max_tokens)
-    print("Output:")
-    print(output)
-    # Decodificar a saída
     decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
-    print("Decoded_Output:")
-    print(decoded_output)
-    # Atualizar o histórico
-    history[-1][1] += decoded_output
-    yield history, history, ""
 start_message = ""

     # A última mensagem do usuário
     user_prompt = history[-1][0]
+    # Prepare the messages
+    messages = [
+        {"role": "system", "content": system_message.strip()},
+        {"role": "user", "content": user_prompt}
+    ]
+    # Apply the chat template
+    gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
+    # Generate the output
+    output = model.generate(input_ids=gen_input.input_ids, temperature=temperature, do_sample=True, top_p=top_p, top_k=top_k, max_length=max_tokens)
+    # Decode the output
     decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Update the history
+    history[-1][1] += decoded_output.split('\n')[-1]  # Only take the assistant's last response
+    return history, history, ""
 start_message = ""