Qwen-VL-Chat

Paused

App Files Files Community

Tonic commited on Nov 14, 2023

Commit

f5d0b7e

•

1 Parent(s): d0d8a82

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -37

app.py CHANGED Viewed

@@ -24,35 +24,15 @@ model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
     torch_dtype=torch.bfloat16,
     load_in_4bit=True                          # For efficient inference, if supported by the GPU card
 )
-model = model.to_bettertransformer()
 # Class to encapsulate the Falcon chatbot
 class FalconChatBot:
     def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
         self.system_prompt = system_prompt
-    def process_history(self, history):
-        if history is None:
-            return []
-        # Ensure that history is a list of dictionaries
-        if not isinstance(history, list):
-            return []
-        # Filter out special commands from the history
-        filtered_history = []
-        for message in history:
-            if isinstance(message, dict):
-                user_message = message.get("user", "")
-                assistant_message = message.get("assistant", "")
-                # Check if the user_message is not a special command
-                if not user_message.startswith("Protagoniste:"):
-                    filtered_history.append({"user": user_message, "assistant": assistant_message})
-        return filtered_history
-    def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
-        # Process the history to remove special commands
-        processed_history = self.process_history(history)
         # Combine the user and assistant messages into a conversation
         conversation = f"{self.system_prompt} {assistant_message if assistant_message else ''} {user_message} "
         # Encode the conversation using the tokenizer
@@ -61,7 +41,6 @@ class FalconChatBot:
         # Generate a response using the Falcon model
         response = model.generate(
             input_ids=input_ids,
-        #   max_length=max_length,
             use_cache=False,
             early_stopping=False,
             bos_token_id=model.config.bos_token_id,
@@ -72,14 +51,10 @@ class FalconChatBot:
             max_new_tokens=max_new_tokens,
             top_p=top_p,
             repetition_penalty=repetition_penalty
-        )        # Decode the generated response to text
         # Decode the generated response to text
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
-        # Update and return the history with the new conversation
-        updated_history = processed_history + [{"user": user_message, "assistant": response_text}]
-        return response_text, updated_history
 # Create the Falcon chatbot instance
 falcon_bot = FalconChatBot()
@@ -87,17 +62,10 @@ falcon_bot = FalconChatBot()
 # Define the Gradio interface
 title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
 description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
-history = [
-    {
-        "user": "",
-        "assistant": ""
-    },
-]
 examples = [
     [
         "Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:",  # user_message
         "[Emmanuel Macron]: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui.",  # assistant_message
-#       history,  # history
         150,  # max_new_tokens
         0.9,  # temperature
         0.90,  # top_p

     torch_dtype=torch.bfloat16,
     load_in_4bit=True                          # For efficient inference, if supported by the GPU card
 )
+model = BetterTransformer.transform(model)
 # Class to encapsulate the Falcon chatbot
 class FalconChatBot:
     def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
         self.system_prompt = system_prompt
+    def predict(self, user_message, assistant_message, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         # Combine the user and assistant messages into a conversation
         conversation = f"{self.system_prompt} {assistant_message if assistant_message else ''} {user_message} "
         # Encode the conversation using the tokenizer
         # Generate a response using the Falcon model
         response = model.generate(
             input_ids=input_ids,
             use_cache=False,
             early_stopping=False,
             bos_token_id=model.config.bos_token_id,
             max_new_tokens=max_new_tokens,
             top_p=top_p,
             repetition_penalty=repetition_penalty
+        )
         # Decode the generated response to text
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
+        return response_text
 # Create the Falcon chatbot instance
 falcon_bot = FalconChatBot()
 # Define the Gradio interface
 title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
 description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
 examples = [
     [
         "Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:",  # user_message
         "[Emmanuel Macron]: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui.",  # assistant_message
         150,  # max_new_tokens
         0.9,  # temperature
         0.90,  # top_p