ybelkada commited on
Commit
22b1ce0
1 Parent(s): 73b7f0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -24
app.py CHANGED
@@ -17,12 +17,12 @@ from transformers import (
17
  )
18
 
19
 
20
- model_name = "mosaicml/mpt-7b-chat"
21
  max_new_tokens = 1536
22
 
23
  # # small testing model:
24
- # model_name = "gpt2"
25
- # max_new_tokens = 128
26
 
27
  auth_token = os.getenv("HF_TOKEN", None)
28
 
@@ -30,24 +30,20 @@ print(f"Starting to load the model {model_name} into memory")
30
 
31
  m = AutoModelForCausalLM.from_pretrained(
32
  model_name,
 
33
  torch_dtype=torch.bfloat16,
34
- trust_remote_code=True,
35
- use_auth_token=auth_token,
36
- max_seq_len=8192,
37
- ).cuda()
38
- tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=auth_token)
39
 
40
- stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
41
 
42
  print(f"Successfully loaded the model {model_name} into memory")
43
 
44
 
45
- start_message = """<|im_start|>system
46
- - You are a helpful assistant chatbot trained by MosaicML.
47
- - You answer questions.
48
- - You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
49
- - You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
50
- """
51
 
52
 
53
  class StopOnTokens(StoppingCriteria):
@@ -177,13 +173,7 @@ with gr.Blocks(
177
  ) as demo:
178
  conversation_id = gr.State(get_uuid)
179
  gr.Markdown(
180
- """<h1><center>MosaicML MPT-7B-Chat</center></h1>
181
-
182
- This demo is of [MPT-7B-Chat](https://huggingface.co/mosaicml/mpt-7b-chat). It is based on [MPT-7B](https://huggingface.co/mosaicml/mpt-7b) fine-tuned with approximately [171,000 conversation samples from this dataset](https://huggingface.co/datasets/sam-mosaic/vicuna_alpaca_hc3_chatml) and another [217,000 from this dataset](https://huggingface.co/datasets/sam-mosaic/hhrlhf_evol_chatml).
183
-
184
- If you're interested in [training](https://www.mosaicml.com/training) and [deploying](https://www.mosaicml.com/inference) your own MPT or LLMs, [sign up](https://forms.mosaicml.com/demo?utm_source=huggingface&utm_medium=referral&utm_campaign=mpt-7b) for MosaicML platform.
185
-
186
- This is running on a smaller, shared GPU, so it may take a few seconds to respond. If you want to run it on your own GPU, you can [download the model from HuggingFace](https://huggingface.co/mosaicml/mpt-7b-chat) and run it locally. Or [Duplicate the Space](https://huggingface.co/spaces/mosaicml/mpt-7b-chat?duplicate=true) to skip the queue and run in a private space.
187
  """
188
  )
189
  chatbot = gr.Chatbot().style(height=500)
@@ -251,8 +241,8 @@ with gr.Blocks(
251
  )
252
  with gr.Row():
253
  gr.Markdown(
254
- "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
255
- "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
256
  "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
257
  "biased, or otherwise offensive outputs.",
258
  elem_classes=["disclaimer"],
 
17
  )
18
 
19
 
20
+ model_name = "timdettmers/guanaco-33b-merged"
21
  max_new_tokens = 1536
22
 
23
  # # small testing model:
24
+ model_name = "gpt2"
25
+ max_new_tokens = 128
26
 
27
  auth_token = os.getenv("HF_TOKEN", None)
28
 
 
30
 
31
  m = AutoModelForCausalLM.from_pretrained(
32
  model_name,
33
+ load_in_8bit=True,
34
  torch_dtype=torch.bfloat16,
35
+ device_map="auto"
36
+ )
37
+ tok = AutoTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
38
+ tok.bos_token_id = 1
 
39
 
40
+ # stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
41
 
42
  print(f"Successfully loaded the model {model_name} into memory")
43
 
44
 
45
+ start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
46
+ prompt = f"{description} ### Human: {user_query} ### Assistant:"
 
 
 
 
47
 
48
 
49
  class StopOnTokens(StoppingCriteria):
 
173
  ) as demo:
174
  conversation_id = gr.State(get_uuid)
175
  gr.Markdown(
176
+ """<h1><center>Guanaco-33b playground</center></h1>
 
 
 
 
 
 
177
  """
178
  )
179
  chatbot = gr.Chatbot().style(height=500)
 
241
  )
242
  with gr.Row():
243
  gr.Markdown(
244
+ "Disclaimer: The model can produce factually incorrect output, and should not be relied on to produce "
245
+ "factually accurate information. The model was trained on various public datasets; while great efforts "
246
  "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
247
  "biased, or otherwise offensive outputs.",
248
  elem_classes=["disclaimer"],