fllay commited on
Commit
b88fc91
·
verified ·
1 Parent(s): 5a1c3b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -17,27 +17,25 @@ model = AutoModelForCausalLM.from_pretrained(
17
  # --- Chat function ---
18
  def chat(message, history, max_new_tokens=128, temperature=0.7):
19
  try:
20
- # Convert history into Hugging Face messages format
21
  messages = []
22
  for user_msg, bot_msg in history:
23
  messages.append({"role": "user", "content": user_msg})
24
  messages.append({"role": "assistant", "content": bot_msg})
25
  messages.append({"role": "user", "content": message})
26
 
27
- # Prepare inputs with chat template return dictionary
28
- inputs = tokenizer.apply_chat_template(
29
  messages,
30
  add_generation_prompt=True,
31
  tokenize=True,
32
- return_tensors="pt",
33
- return_dict=True
34
- )
35
 
36
- # Move all tensors in the input dict to the model device
37
- for k in inputs:
38
- inputs[k] = inputs[k].to(model.device)
39
 
40
- # Generate model output
41
  outputs = model.generate(
42
  **inputs,
43
  max_new_tokens=max_new_tokens,
@@ -46,19 +44,18 @@ def chat(message, history, max_new_tokens=128, temperature=0.7):
46
  pad_token_id=tokenizer.eos_token_id
47
  )
48
 
49
- # Decode ONLY the newly generated tokens (past the input length)
50
  response = tokenizer.decode(
51
- outputs[0][inputs["input_ids"].shape[-1]:],
52
  skip_special_tokens=True
53
  ).strip()
54
 
55
- # Append to history
56
  history.append((message, response))
57
  return history, history, ""
58
 
59
  except Exception as e:
60
  import traceback
61
- traceback.print_exc() # will show in HF Space Logs
62
  return history + [(message, f"⚠️ Error: {str(e)}")], history, ""
63
 
64
 
 
17
  # --- Chat function ---
18
  def chat(message, history, max_new_tokens=128, temperature=0.7):
19
  try:
20
+ # Convert conversation history into messages
21
  messages = []
22
  for user_msg, bot_msg in history:
23
  messages.append({"role": "user", "content": user_msg})
24
  messages.append({"role": "assistant", "content": bot_msg})
25
  messages.append({"role": "user", "content": message})
26
 
27
+ # Apply chat template -> returns tensor of input_ids
28
+ input_ids = tokenizer.apply_chat_template(
29
  messages,
30
  add_generation_prompt=True,
31
  tokenize=True,
32
+ return_tensors="pt"
33
+ ).to(model.device)
 
34
 
35
+ # Wrap as dict so generate(**inputs) works
36
+ inputs = {"input_ids": input_ids}
 
37
 
38
+ # Generate output
39
  outputs = model.generate(
40
  **inputs,
41
  max_new_tokens=max_new_tokens,
 
44
  pad_token_id=tokenizer.eos_token_id
45
  )
46
 
47
+ # Decode new tokens only
48
  response = tokenizer.decode(
49
+ outputs[0][input_ids.shape[-1]:],
50
  skip_special_tokens=True
51
  ).strip()
52
 
 
53
  history.append((message, response))
54
  return history, history, ""
55
 
56
  except Exception as e:
57
  import traceback
58
+ traceback.print_exc()
59
  return history + [(message, f"⚠️ Error: {str(e)}")], history, ""
60
 
61