guanaco-65b-4bit

Paused

timdettmers commited on May 24, 2023

Commit

cfde7ef

•

1 Parent(s): c0f8c05

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import datetime
 import os
 from threading import Event, Thread
 from uuid import uuid4
 import gradio as gr
 import requests
@@ -19,7 +20,8 @@ from transformers import (
 # model_name = "lmsys/vicuna-7b-delta-v1.1"
-model_name = "timdettmers/guanaco-33b-merged"
 max_new_tokens = 1536
 auth_token = os.getenv("HF_TOKEN", None)
@@ -28,12 +30,18 @@ print(f"Starting to load the model {model_name} into memory")
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
-    load_in_8bit=True,
     torch_dtype=torch.bfloat16,
     device_map={"": 0}
 )
 m.eval()
-tok = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")
 tok.bos_token_id = 1
 stop_token_ids = [0]
@@ -172,7 +180,7 @@ with gr.Blocks(
 ) as demo:
     conversation_id = gr.State(get_uuid)
     gr.Markdown(
-        """<h1><center>Guanaco-33b playground</center></h1>
 """
     )
     chatbot = gr.Chatbot().style(height=500)

 import os
 from threading import Event, Thread
 from uuid import uuid4
+from peft import PeftModel
 import gradio as gr
 import requests
 # model_name = "lmsys/vicuna-7b-delta-v1.1"
+model_name = "decapoda-research/llama-65b-hf"
 max_new_tokens = 1536
 auth_token = os.getenv("HF_TOKEN", None)
 m = AutoModelForCausalLM.from_pretrained(
     model_name,
+    quantization_config=transformers.BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type='nf4' # {'fp4', 'nf4'}
+        ),
     torch_dtype=torch.bfloat16,
     device_map={"": 0}
 )
+m = PeftModel.from_pretrained(m, 'timdettmers/guanaco-65b')
 m.eval()
+tok = LlamaTokenizer.from_pretrained("decapoda-research/llama-65b-hf")
 tok.bos_token_id = 1
 stop_token_ids = [0]
 ) as demo:
     conversation_id = gr.State(get_uuid)
     gr.Markdown(
+        """<h1><center>Guanaco-65b playground</center></h1>
 """
     )
     chatbot = gr.Chatbot().style(height=500)