Spaces:
Running
on
Zero
Running
on
Zero
NGUYEN, Xuan Phi
commited on
Commit
·
4a01c79
1
Parent(s):
3b65eaa
update
Browse files
multipurpose_chatbot/configs.py
CHANGED
@@ -184,7 +184,7 @@ CHUNK_OVERLAP = int(os.environ.get("CHUNK_SIZE", "50"))
|
|
184 |
|
185 |
|
186 |
DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, honest and safe AI assistant."""
|
187 |
-
DEFAULT_SYSTEM_PROMPT = """You are SeaLLM, you are a helpful, respectful and honest AI assistant. Based on your internal clock, the current date time
|
188 |
|
189 |
Your knowledge base was last updated on August 2023. Thus, you should answer questions about events prior to and after August 2023 the way a highly informed individual in August 2023 would if they were talking to someone from the above date, and can let the human know this when relevant.
|
190 |
|
|
|
184 |
|
185 |
|
186 |
DEFAULT_SYSTEM_PROMPT = """You are a helpful, respectful, honest and safe AI assistant."""
|
187 |
+
DEFAULT_SYSTEM_PROMPT = """You are SeaLLM, you are a helpful, respectful and honest AI assistant. Based on your internal clock, the current date time is {cur_datetime}.
|
188 |
|
189 |
Your knowledge base was last updated on August 2023. Thus, you should answer questions about events prior to and after August 2023 the way a highly informed individual in August 2023 would if they were talking to someone from the above date, and can let the human know this when relevant.
|
190 |
|
multipurpose_chatbot/demos/chat_interface.py
CHANGED
@@ -110,7 +110,7 @@ def get_datetime_string():
|
|
110 |
# tz_string = datetime.now().astimezone()
|
111 |
# dt_string = now.strftime("%B %d, %Y, %H:%M:%S")
|
112 |
# dt_string = datetime.now().astimezone().strftime("%B %d, %Y, %H:%M GMT%Z")
|
113 |
-
dt_string = datetime.now().
|
114 |
return dt_string
|
115 |
|
116 |
|
|
|
110 |
# tz_string = datetime.now().astimezone()
|
111 |
# dt_string = now.strftime("%B %d, %Y, %H:%M:%S")
|
112 |
# dt_string = datetime.now().astimezone().strftime("%B %d, %Y, %H:%M GMT%Z")
|
113 |
+
dt_string = datetime.now().strftime("%B %d, %Y")
|
114 |
return dt_string
|
115 |
|
116 |
|
multipurpose_chatbot/engines/transformers_engine.py
CHANGED
@@ -550,40 +550,47 @@ class TransformersEngine(BaseEngine):
|
|
550 |
self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
|
551 |
|
552 |
self.maybe_raise_safety(prompt)
|
553 |
-
|
554 |
-
with torch.no_grad():
|
555 |
-
inputs = self.tokenizer(prompt, return_tensors='pt')
|
556 |
-
num_tokens = inputs.input_ids.size(1)
|
557 |
|
558 |
-
|
|
|
|
|
|
|
559 |
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
|
|
|
|
|
550 |
self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
|
551 |
|
552 |
self.maybe_raise_safety(prompt)
|
|
|
|
|
|
|
|
|
553 |
|
554 |
+
if temperature == 0:
|
555 |
+
temperature = 0.0001
|
556 |
+
|
557 |
+
try:
|
558 |
|
559 |
+
with torch.no_grad():
|
560 |
+
inputs = self.tokenizer(prompt, return_tensors='pt')
|
561 |
+
num_tokens = inputs.input_ids.size(1)
|
562 |
+
|
563 |
+
inputs = inputs.to(self._model.device)
|
564 |
+
|
565 |
+
generator = self._model.generate(
|
566 |
+
**inputs,
|
567 |
+
do_sample=True,
|
568 |
+
temperature=temperature,
|
569 |
+
max_new_tokens=max_tokens,
|
570 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
571 |
+
)
|
572 |
|
573 |
+
out_tokens = []
|
574 |
+
response = None
|
575 |
+
for index, token in enumerate(generator):
|
576 |
+
out_tokens.extend(token.tolist())
|
577 |
+
response = self.tokenizer.decode(out_tokens)
|
578 |
+
if "<|im_start|>assistant\n" in response:
|
579 |
+
response = response.split("<|im_start|>assistant\n")[-1]
|
580 |
+
num_tokens += 1
|
581 |
+
# print(f"{response}", end='\r')
|
582 |
+
# sys.stdout.flush()
|
583 |
+
self.maybe_raise_safety(response, gen_index=index)
|
584 |
+
yield response, num_tokens
|
585 |
+
|
586 |
+
del generator
|
587 |
+
if response is not None:
|
588 |
+
if "<|im_start|>assistant\n" in response:
|
589 |
+
response = response.split("<|im_start|>assistant\n")[-1]
|
590 |
+
|
591 |
+
self.maybe_raise_safety(response)
|
592 |
+
full_text = prompt + response
|
593 |
+
num_tokens = len(self.tokenizer.encode(full_text))
|
594 |
+
yield response, num_tokens
|
595 |
+
except RuntimeError as e:
|
596 |
+
raise gr.Error(str(e))
|