Chinese-Llama-2-7b

Runtime error

shiyemin2 commited on Jul 22, 2023

Commit

042afcb

1 Parent(s): 2a79bc9

Update model.py

Files changed (1) hide show

model.py CHANGED Viewed

@@ -4,14 +4,25 @@ from typing import Iterator
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-model_id = 'LinkSoul/Chinese-Llama-2-7b'
 if torch.cuda.is_available():
-    model = AutoModelForCausalLM.from_pretrained(
-        model_id,
-        torch_dtype=torch.float16,
-        device_map='auto'
-    )
 else:
     model = None
 tokenizer = AutoTokenizer.from_pretrained(model_id)

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+# Original version
+# model_id = "LinkSoul/Chinese-Llama-2-7b"
+# 4 bit version
+model_id = "LinkSoul/Chinese-Llama-2-7b-4bit"
 if torch.cuda.is_available():
+    if model_id.endswith("4bit"):
+        model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                load_in_4bit=True,
+                local_files_only=True,
+                torch_dtype=torch.float16
+            )
+    else:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=torch.float16,
+            device_map='auto'
+        )
 else:
     model = None
 tokenizer = AutoTokenizer.from_pretrained(model_id)