SeaLLM-7B-v2

Runtime error

App Files Files Community

nxphi47 commited on Mar 22

Commit

3eeebb2

•

1 Parent(s): fb10dcf

Update multipurpose_chatbot/engines/transformers_engine.py

Browse files

Files changed (1) hide show

multipurpose_chatbot/engines/transformers_engine.py +124 -2

multipurpose_chatbot/engines/transformers_engine.py CHANGED Viewed

@@ -397,6 +397,109 @@ class NewGenerationMixin(GenerationMixin):
 class TransformersEngine(BaseEngine):
     @property
     def max_position_embeddings(self) -> int:
@@ -424,6 +527,18 @@ class TransformersEngine(BaseEngine):
         print(self._model)
         print(f"{self.max_position_embeddings=}")
     @spaces.GPU
     def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
@@ -431,6 +546,9 @@ class TransformersEngine(BaseEngine):
         import sys
         # self._model._sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
         self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
         with torch.no_grad():
             inputs = self.tokenizer(prompt, return_tensors='pt')
             num_tokens = inputs.input_ids.size(1)
@@ -447,7 +565,7 @@ class TransformersEngine(BaseEngine):
             out_tokens = []
             response = None
-            for token in generator:
                 out_tokens.extend(token.tolist())
                 response = self.tokenizer.decode(out_tokens)
                 if "<|im_start|>assistant\n" in response:
@@ -455,11 +573,15 @@ class TransformersEngine(BaseEngine):
                 num_tokens += 1
                 # print(f"{response}", end='\r')
                 # sys.stdout.flush()
                 yield response, num_tokens
             if response is not None:
                 if "<|im_start|>assistant\n" in response:
                     response = response.split("<|im_start|>assistant\n")[-1]
                 full_text = prompt + response
                 num_tokens = len(self.tokenizer.encode(full_text))
                 yield response, num_tokens

+from ..configs import (
+    STREAM_CHECK_MULTIPLE,
+    STREAM_YIELD_MULTIPLE,
+)
+BLOCK_LANGS = str(os.environ.get("BLOCK_LANGS", ""))
+BLOCK_LANGS = [x.strip() for x in BLOCK_LANGS.strip().split(";")] if len(BLOCK_LANGS.strip()) > 0 else []
+LANG_BLOCK_HISTORY = bool(int(os.environ.get("LANG_BLOCK_HISTORY", "0")))
+KEYWORDS = os.environ.get("KEYWORDS", "").strip()
+KEYWORDS = KEYWORDS.split(";") if len(KEYWORDS) > 0 else []
+KEYWORDS = [x.lower() for x in KEYWORDS]
+LANG_BLOCK_MESSAGE = """Unsupported language."""
+KEYWORD_BLOCK_MESSAGE = "Invalid request."
+def _detect_lang(text):
+    # Disable language that may have safety risk
+    from langdetect import detect as detect_lang
+    dlang = None
+    try:
+        dlang = detect_lang(text)
+    except Exception as e:
+        if "No features in text." in str(e):
+            return "en"
+        else:
+            return "zh"
+    return dlang
+def block_lang(
+    message: str,
+    history: List[Tuple[str, str]] = None,
+) -> str:
+    # relieve history base block
+    if len(BLOCK_LANGS) == 0:
+        return False
+    if LANG_BLOCK_HISTORY and history is not None and any((LANG_BLOCK_MESSAGE in x[1].strip()) for x in history):
+        return True
+    else:
+        _lang = _detect_lang(message)
+        if _lang in BLOCK_LANGS:
+            # print(f'Detect blocked {_lang}: {message}')
+            return True
+        else:
+            return False
+def safety_check(text, history=None, ) -> Optional[str]:
+    """
+    Despite our effort in safety tuning and red teaming, our models may still generate harmful or illegal content.
+    This provides an additional security measure to enhance safety and compliance with local regulations.
+    """
+    if len(KEYWORDS) > 0 and any(x in text.lower() for x in KEYWORDS):
+        return KEYWORD_BLOCK_MESSAGE
+    if len(BLOCK_LANGS) > 0:
+        if block_lang(text, history):
+            return LANG_BLOCK_MESSAGE
+    return None
+def safety_check_conversation_string(text, delimiter=None) -> Optional[str]:
+    if len(KEYWORDS) > 0 and any(x in text.lower() for x in KEYWORDS):
+        return KEYWORD_BLOCK_MESSAGE
+    if len(BLOCK_LANGS) > 0:
+        import re
+        delimiter = delimiter or (r"</s><\|im_start\|>user\n", r"</s><\|im_start\|>assistant\n", r"<\|im_start\|>system\n")
+        turns = re.split(r"|".join(delimiter), text)
+        turns = [t for t in turns if t.strip() != '']
+        for t in turns:
+            if block_lang(t):
+                return LANG_BLOCK_MESSAGE
+    return None
+def is_check_safety():
+    return len(KEYWORDS) > 0 or len(BLOCK_LANGS) > 0
+def safety_check_conversation(conversation) -> Optional[str]:
+    """
+    Despite our effort in safety tuning and red teaming, our models may still generate harmful or illegal content.
+    This provides an additional security measure to enhance safety and compliance with local regulations.
+    """
+    texts = [c['content'] for c in conversation]
+    for text in texts:
+        if len(KEYWORDS) > 0 and any(x in text.lower() for x in KEYWORDS):
+            return KEYWORD_BLOCK_MESSAGE
+        if len(BLOCK_LANGS) > 0:
+            if block_lang(text):
+                return LANG_BLOCK_MESSAGE
+    return None
 class TransformersEngine(BaseEngine):
     @property
     def max_position_embeddings(self) -> int:
         print(self._model)
         print(f"{self.max_position_embeddings=}")
+    def maybe_raise_safety(self, message, gen_index=-1):
+        if is_check_safety():
+            if gen_index < 0:
+                message_safety = safety_check_conversation_string(message)
+                if message_safety is not None:
+                    raise gr.Error(message_safety)
+            else:
+                if STREAM_CHECK_MULTIPLE > 0 and gen_index % STREAM_CHECK_MULTIPLE == 0:
+                    message_safety = safety_check_conversation_string(message)
+                    if message_safety is not None:
+                        raise gr.Error(message_safety)
     @spaces.GPU
     def generate_yield_string(self, prompt, temperature, max_tokens, stop_strings: Optional[Tuple[str]] = None, **kwargs):
         import sys
         # self._model._sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
         self._model.sample = types.MethodType(NewGenerationMixin.sample_stream, self._model)
+        self.maybe_raise_safety(prompt)
         with torch.no_grad():
             inputs = self.tokenizer(prompt, return_tensors='pt')
             num_tokens = inputs.input_ids.size(1)
             out_tokens = []
             response = None
+            for index, token in enumerate(generator):
                 out_tokens.extend(token.tolist())
                 response = self.tokenizer.decode(out_tokens)
                 if "<|im_start|>assistant\n" in response:
                 num_tokens += 1
                 # print(f"{response}", end='\r')
                 # sys.stdout.flush()
+                self.maybe_raise_safety(response, gen_index=index)
                 yield response, num_tokens
+            del generator
             if response is not None:
                 if "<|im_start|>assistant\n" in response:
                     response = response.split("<|im_start|>assistant\n")[-1]
+                self.maybe_raise_safety(response)
                 full_text = prompt + response
                 num_tokens = len(self.tokenizer.encode(full_text))
                 yield response, num_tokens