guanaco-65b-4bit

Paused

App Files Files Community

ybelkada commited on May 23, 2023

Commit

34efb62

•

1 Parent(s): d69ff54

working v

Browse files

Files changed (1) hide show

app.py +11 -14

app.py CHANGED Viewed

@@ -11,40 +11,37 @@ import torch
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     StoppingCriteria,
     StoppingCriteriaList,
     TextIteratorStreamer,
 )
 model_name = "timdettmers/guanaco-33b-merged"
 max_new_tokens = 1536
-# # small testing model:
-model_name = "gpt2"
-max_new_tokens = 128
 auth_token = os.getenv("HF_TOKEN", None)
 print(f"Starting to load the model {model_name} into memory")
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
-    # load_in_8bit=True,
     torch_dtype=torch.bfloat16,
-    # device_map="auto"
 )
-# tok = AutoTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
-tok = AutoTokenizer.from_pretrained(model_name)
 tok.bos_token_id = 1
-# stop_token_ids = tok.convert_tokens_to_ids(["<|im_end|>", "<|endoftext|>"])
 print(f"Successfully loaded the model {model_name} into memory")
 start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
-prompt = f"{start_message} ### Human: {user_query} ### Assistant:"
 class StopOnTokens(StoppingCriteria):
@@ -60,8 +57,8 @@ def convert_history_to_text(history):
         [
             "".join(
                 [
-                    f"<|im_start|>user\n{item[0]}<|im_end|>",
-                    f"<|im_start|>assistant\n{item[1]}<|im_end|>",
                 ]
             )
             for item in history[:-1]
@@ -71,8 +68,8 @@ def convert_history_to_text(history):
         [
             "".join(
                 [
-                    f"<|im_start|>user\n{history[-1][0]}<|im_end|>",
-                    f"<|im_start|>assistant\n{history[-1][1]}",
                 ]
             )
         ]

 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
+    LlamaTokenizer,
     StoppingCriteria,
     StoppingCriteriaList,
     TextIteratorStreamer,
 )
+# model_name = "lmsys/vicuna-7b-delta-v1.1"
 model_name = "timdettmers/guanaco-33b-merged"
 max_new_tokens = 1536
 auth_token = os.getenv("HF_TOKEN", None)
 print(f"Starting to load the model {model_name} into memory")
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
+    load_in_8bit=True,
     torch_dtype=torch.bfloat16,
+    device_map={"": 0}
 )
+tok = LlamaTokenizer.from_pretrained(model_name)
 tok.bos_token_id = 1
+stop_token_ids = [0]
 print(f"Successfully loaded the model {model_name} into memory")
 start_message = """A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions."""
 class StopOnTokens(StoppingCriteria):
         [
             "".join(
                 [
+                    f"### Human: {item[0]}\n",
+                    f"### Assistant: {item[1]}\n",
                 ]
             )
             for item in history[:-1]
         [
             "".join(
                 [
+                    f"### Human: {history[-1][0]}\n",
+                    f"### Assistant: {history[-1][1]}\n",
                 ]
             )
         ]