Update app.py
Browse files
app.py
CHANGED
@@ -17,12 +17,12 @@ from transformers import (
|
|
17 |
)
|
18 |
|
19 |
|
20 |
-
model_name = "
|
21 |
max_new_tokens = 1536
|
22 |
|
23 |
# # small testing model:
|
24 |
-
|
25 |
-
|
26 |
|
27 |
auth_token = os.getenv("HF_TOKEN", None)
|
28 |
|
@@ -30,24 +30,20 @@ print(f"Starting to load the model {model_name} into memory")
|
|
30 |
|
31 |
m = AutoModelForCausalLM.from_pretrained(
|
32 |
model_name,
|
|
|
33 |
torch_dtype=torch.bfloat16,
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=auth_token)
|
39 |
|
40 |
-
stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
|
41 |
|
42 |
print(f"Successfully loaded the model {model_name} into memory")
|
43 |
|
44 |
|
45 |
-
start_message = """
|
46 |
-
|
47 |
-
- You answer questions.
|
48 |
-
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
|
49 |
-
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
|
50 |
-
"""
|
51 |
|
52 |
|
53 |
class StopOnTokens(StoppingCriteria):
|
@@ -177,13 +173,7 @@ with gr.Blocks(
|
|
177 |
) as demo:
|
178 |
conversation_id = gr.State(get_uuid)
|
179 |
gr.Markdown(
|
180 |
-
"""<h1><center>
|
181 |
-
|
182 |
-
This demo is of [MPT-7B-Chat](https://huggingface.co/mosaicml/mpt-7b-chat). It is based on [MPT-7B](https://huggingface.co/mosaicml/mpt-7b) fine-tuned with approximately [171,000 conversation samples from this dataset](https://huggingface.co/datasets/sam-mosaic/vicuna_alpaca_hc3_chatml) and another [217,000 from this dataset](https://huggingface.co/datasets/sam-mosaic/hhrlhf_evol_chatml).
|
183 |
-
|
184 |
-
If you're interested in [training](https://www.mosaicml.com/training) and [deploying](https://www.mosaicml.com/inference) your own MPT or LLMs, [sign up](https://forms.mosaicml.com/demo?utm_source=huggingface&utm_medium=referral&utm_campaign=mpt-7b) for MosaicML platform.
|
185 |
-
|
186 |
-
This is running on a smaller, shared GPU, so it may take a few seconds to respond. If you want to run it on your own GPU, you can [download the model from HuggingFace](https://huggingface.co/mosaicml/mpt-7b-chat) and run it locally. Or [Duplicate the Space](https://huggingface.co/spaces/mosaicml/mpt-7b-chat?duplicate=true) to skip the queue and run in a private space.
|
187 |
"""
|
188 |
)
|
189 |
chatbot = gr.Chatbot().style(height=500)
|
@@ -251,8 +241,8 @@ with gr.Blocks(
|
|
251 |
)
|
252 |
with gr.Row():
|
253 |
gr.Markdown(
|
254 |
-
"Disclaimer:
|
255 |
-
"factually accurate information.
|
256 |
"have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
|
257 |
"biased, or otherwise offensive outputs.",
|
258 |
elem_classes=["disclaimer"],
|
|
|
17 |
)
|
18 |
|
19 |
|
20 |
+
model_name = "timdettmers/guanaco-33b-merged"
|
21 |
max_new_tokens = 1536
|
22 |
|
23 |
# # small testing model:
|
24 |
+
model_name = "gpt2"
|
25 |
+
max_new_tokens = 128
|
26 |
|
27 |
auth_token = os.getenv("HF_TOKEN", None)
|
28 |
|
|
|
30 |
|
31 |
m = AutoModelForCausalLM.from_pretrained(
|
32 |
model_name,
|
33 |
+
load_in_8bit=True,
|
34 |
torch_dtype=torch.bfloat16,
|
35 |
+
device_map="auto"
|
36 |
+
)
|
37 |
+
tok = AutoTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
|
38 |
+
tok.bos_token_id = 1
|
|
|
39 |
|
40 |
+
# stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
|
41 |
|
42 |
print(f"Successfully loaded the model {model_name} into memory")
|
43 |
|
44 |
|
45 |
+
start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
|
46 |
+
prompt = f"{description} ### Human: {user_query} ### Assistant:"
|
|
|
|
|
|
|
|
|
47 |
|
48 |
|
49 |
class StopOnTokens(StoppingCriteria):
|
|
|
173 |
) as demo:
|
174 |
conversation_id = gr.State(get_uuid)
|
175 |
gr.Markdown(
|
176 |
+
"""<h1><center>Guanaco-33b playground</center></h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
"""
|
178 |
)
|
179 |
chatbot = gr.Chatbot().style(height=500)
|
|
|
241 |
)
|
242 |
with gr.Row():
|
243 |
gr.Markdown(
|
244 |
+
"Disclaimer: The model can produce factually incorrect output, and should not be relied on to produce "
|
245 |
+
"factually accurate information. The model was trained on various public datasets; while great efforts "
|
246 |
"have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
|
247 |
"biased, or otherwise offensive outputs.",
|
248 |
elem_classes=["disclaimer"],
|