ruslanmv commited on
Commit
f758f3f
·
verified ·
1 Parent(s): 03ee628

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -28
app.py CHANGED
@@ -27,36 +27,13 @@ def askme(symptoms, question):
27
  content = symptoms + " " + question
28
  messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
29
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
- # inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
31
- # outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
32
- # response_text = tokenizer.batch_decode(outputs)[0].strip()
33
-
34
-
35
-
36
- # Tokenize all prompts and batch them
37
- tokenized_inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(device)
38
- outputs = model.generate(**tokenized_inputs, max_new_tokens=200, use_cache=True)
39
-
40
- # Decode responses
41
- response_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
42
-
43
-
44
-
45
  # Remove system messages and content
46
- #response_text = response_text.replace(sys_message, "").replace(content, "").strip()
47
  # Extract only the assistant's response
48
- #assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end', '')
49
- # Extract only the assistant's response
50
-
51
-
52
-
53
-
54
- # Extract assistant's responses
55
- assistant_responses = []
56
- for response_text in response_texts:
57
- assistant_response = response_text.split("assistant")[1].strip().replace('<|im_end', '')
58
- assistant_responses.append(assistant_response)
59
-
60
  return assistant_responses
61
 
62
 
 
27
  content = symptoms + " " + question
28
  messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
29
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+ inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
31
+ outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
32
+ response_text = tokenizer.batch_decode(outputs)[0].strip() #skip_special_tokens=True
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Remove system messages and content
 
34
  # Extract only the assistant's response
35
+ assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end', '')
36
+ # Extract only the assistant's response
 
 
 
 
 
 
 
 
 
 
37
  return assistant_responses
38
 
39