Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -24,35 +24,15 @@ model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
|
|
24 |
torch_dtype=torch.bfloat16,
|
25 |
load_in_4bit=True # For efficient inference, if supported by the GPU card
|
26 |
)
|
27 |
-
model =
|
|
|
28 |
|
29 |
# Class to encapsulate the Falcon chatbot
|
30 |
class FalconChatBot:
|
31 |
def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
|
32 |
self.system_prompt = system_prompt
|
33 |
|
34 |
-
def
|
35 |
-
if history is None:
|
36 |
-
return []
|
37 |
-
|
38 |
-
# Ensure that history is a list of dictionaries
|
39 |
-
if not isinstance(history, list):
|
40 |
-
return []
|
41 |
-
|
42 |
-
# Filter out special commands from the history
|
43 |
-
filtered_history = []
|
44 |
-
for message in history:
|
45 |
-
if isinstance(message, dict):
|
46 |
-
user_message = message.get("user", "")
|
47 |
-
assistant_message = message.get("assistant", "")
|
48 |
-
# Check if the user_message is not a special command
|
49 |
-
if not user_message.startswith("Protagoniste:"):
|
50 |
-
filtered_history.append({"user": user_message, "assistant": assistant_message})
|
51 |
-
return filtered_history
|
52 |
-
|
53 |
-
def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
54 |
-
# Process the history to remove special commands
|
55 |
-
processed_history = self.process_history(history)
|
56 |
# Combine the user and assistant messages into a conversation
|
57 |
conversation = f"{self.system_prompt} {assistant_message if assistant_message else ''} {user_message} "
|
58 |
# Encode the conversation using the tokenizer
|
@@ -61,7 +41,6 @@ class FalconChatBot:
|
|
61 |
# Generate a response using the Falcon model
|
62 |
response = model.generate(
|
63 |
input_ids=input_ids,
|
64 |
-
# max_length=max_length,
|
65 |
use_cache=False,
|
66 |
early_stopping=False,
|
67 |
bos_token_id=model.config.bos_token_id,
|
@@ -72,14 +51,10 @@ class FalconChatBot:
|
|
72 |
max_new_tokens=max_new_tokens,
|
73 |
top_p=top_p,
|
74 |
repetition_penalty=repetition_penalty
|
75 |
-
)
|
76 |
-
|
77 |
# Decode the generated response to text
|
78 |
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
|
79 |
-
|
80 |
-
updated_history = processed_history + [{"user": user_message, "assistant": response_text}]
|
81 |
-
return response_text, updated_history
|
82 |
-
|
83 |
|
84 |
# Create the Falcon chatbot instance
|
85 |
falcon_bot = FalconChatBot()
|
@@ -87,17 +62,10 @@ falcon_bot = FalconChatBot()
|
|
87 |
# Define the Gradio interface
|
88 |
title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
|
89 |
description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
|
90 |
-
history = [
|
91 |
-
{
|
92 |
-
"user": "",
|
93 |
-
"assistant": ""
|
94 |
-
},
|
95 |
-
]
|
96 |
examples = [
|
97 |
[
|
98 |
"Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:", # user_message
|
99 |
"[Emmanuel Macron]: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui.", # assistant_message
|
100 |
-
# history, # history
|
101 |
150, # max_new_tokens
|
102 |
0.9, # temperature
|
103 |
0.90, # top_p
|
|
|
24 |
torch_dtype=torch.bfloat16,
|
25 |
load_in_4bit=True # For efficient inference, if supported by the GPU card
|
26 |
)
|
27 |
+
model = BetterTransformer.transform(model)
|
28 |
+
|
29 |
|
30 |
# Class to encapsulate the Falcon chatbot
|
31 |
class FalconChatBot:
|
32 |
def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
|
33 |
self.system_prompt = system_prompt
|
34 |
|
35 |
+
def predict(self, user_message, assistant_message, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Combine the user and assistant messages into a conversation
|
37 |
conversation = f"{self.system_prompt} {assistant_message if assistant_message else ''} {user_message} "
|
38 |
# Encode the conversation using the tokenizer
|
|
|
41 |
# Generate a response using the Falcon model
|
42 |
response = model.generate(
|
43 |
input_ids=input_ids,
|
|
|
44 |
use_cache=False,
|
45 |
early_stopping=False,
|
46 |
bos_token_id=model.config.bos_token_id,
|
|
|
51 |
max_new_tokens=max_new_tokens,
|
52 |
top_p=top_p,
|
53 |
repetition_penalty=repetition_penalty
|
54 |
+
)
|
|
|
55 |
# Decode the generated response to text
|
56 |
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
|
57 |
+
return response_text
|
|
|
|
|
|
|
58 |
|
59 |
# Create the Falcon chatbot instance
|
60 |
falcon_bot = FalconChatBot()
|
|
|
62 |
# Define the Gradio interface
|
63 |
title = "👋🏻Bienvenue à Tonic's 🌜🌚Claire Chat !"
|
64 |
description = "Vous pouvez utiliser [🌜🌚ClaireGPT](https://huggingface.co/OpenLLM-France/Claire-7B-0.1) Ou dupliquer pour l'uiliser localement ou sur huggingface! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
examples = [
|
66 |
[
|
67 |
"Le dialogue suivant est une conversation entre Emmanuel Macron et Elon Musk:", # user_message
|
68 |
"[Emmanuel Macron]: Bonjour Monsieur Musk. Je vous remercie de me recevoir aujourd'hui.", # assistant_message
|
|
|
69 |
150, # max_new_tokens
|
70 |
0.9, # temperature
|
71 |
0.90, # top_p
|