guanaco-65b-4bit

Paused

App Files Files Community

ybelkada commited on May 23, 2023

Commit

22b1ce0

•

1 Parent(s): 73b7f0f

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -24

app.py CHANGED Viewed

@@ -17,12 +17,12 @@ from transformers import (
 )
-model_name = "mosaicml/mpt-7b-chat"
 max_new_tokens = 1536
 # # small testing model:
-# model_name = "gpt2"
-# max_new_tokens = 128
 auth_token = os.getenv("HF_TOKEN", None)
@@ -30,24 +30,20 @@ print(f"Starting to load the model {model_name} into memory")
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
     torch_dtype=torch.bfloat16,
-    trust_remote_code=True,
-    use_auth_token=auth_token,
-    max_seq_len=8192,
-).cuda()
-tok = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_auth_token=auth_token)
-stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
 print(f"Successfully loaded the model {model_name} into memory")
-start_message = """<|im_start|>system
-- You are a helpful assistant chatbot trained by MosaicML.
-- You answer questions.
-- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
-- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
-"""
 class StopOnTokens(StoppingCriteria):
@@ -177,13 +173,7 @@ with gr.Blocks(
 ) as demo:
     conversation_id = gr.State(get_uuid)
     gr.Markdown(
-        """<h1><center>MosaicML MPT-7B-Chat</center></h1>
-        This demo is of [MPT-7B-Chat](https://huggingface.co/mosaicml/mpt-7b-chat). It is based on [MPT-7B](https://huggingface.co/mosaicml/mpt-7b) fine-tuned with approximately [171,000 conversation samples from this dataset](https://huggingface.co/datasets/sam-mosaic/vicuna_alpaca_hc3_chatml) and another [217,000 from this dataset](https://huggingface.co/datasets/sam-mosaic/hhrlhf_evol_chatml).
-        If you're interested in [training](https://www.mosaicml.com/training) and [deploying](https://www.mosaicml.com/inference) your own MPT or LLMs, [sign up](https://forms.mosaicml.com/demo?utm_source=huggingface&utm_medium=referral&utm_campaign=mpt-7b) for MosaicML platform.
-        This is running on a smaller, shared GPU, so it may take a few seconds to respond. If you want to run it on your own GPU, you can [download the model from HuggingFace](https://huggingface.co/mosaicml/mpt-7b-chat) and run it locally. Or [Duplicate the Space](https://huggingface.co/spaces/mosaicml/mpt-7b-chat?duplicate=true) to skip the queue and run in a private space.
 """
     )
     chatbot = gr.Chatbot().style(height=500)
@@ -251,8 +241,8 @@ with gr.Blocks(
                         )
     with gr.Row():
         gr.Markdown(
-            "Disclaimer: MPT-7B can produce factually incorrect output, and should not be relied on to produce "
-            "factually accurate information. MPT-7B was trained on various public datasets; while great efforts "
             "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
             "biased, or otherwise offensive outputs.",
             elem_classes=["disclaimer"],

 )
+model_name = "timdettmers/guanaco-33b-merged"
 max_new_tokens = 1536
 # # small testing model:
+model_name = "gpt2"
+max_new_tokens = 128
 auth_token = os.getenv("HF_TOKEN", None)
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
+    load_in_8bit=True,
     torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+tok = AutoTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
+tok.bos_token_id = 1
+# stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
 print(f"Successfully loaded the model {model_name} into memory")
+start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
+prompt = f"{description} ### Human: {user_query} ### Assistant:"
 class StopOnTokens(StoppingCriteria):
 ) as demo:
     conversation_id = gr.State(get_uuid)
     gr.Markdown(
+        """<h1><center>Guanaco-33b playground</center></h1>
 """
     )
     chatbot = gr.Chatbot().style(height=500)
                         )
     with gr.Row():
         gr.Markdown(
+            "Disclaimer: The model can produce factually incorrect output, and should not be relied on to produce "
+            "factually accurate information. The model was trained on various public datasets; while great efforts "
             "have been taken to clean the pretraining data, it is possible that this model could generate lewd, "
             "biased, or otherwise offensive outputs.",
             elem_classes=["disclaimer"],