Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -33,9 +33,11 @@ if not torch.cuda.is_available():
|
|
33 |
if torch.cuda.is_available():
|
34 |
# model_id = "meta-llama/Llama-2-7b-chat-hf"
|
35 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
36 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
38 |
tokenizer.use_default_system_prompt = False
|
|
|
|
|
39 |
|
40 |
|
41 |
@spaces.GPU
|
@@ -50,8 +52,8 @@ def generate(
|
|
50 |
repetition_penalty: float = 1.2,
|
51 |
) -> Iterator[str]:
|
52 |
conversation = []
|
53 |
-
if system_prompt:
|
54 |
-
|
55 |
for user, assistant in chat_history:
|
56 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
57 |
conversation.append({"role": "user", "content": message})
|
|
|
33 |
if torch.cuda.is_available():
|
34 |
# model_id = "meta-llama/Llama-2-7b-chat-hf"
|
35 |
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
36 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda", torch_dtype=torch.bfloat16)
|
37 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
38 |
tokenizer.use_default_system_prompt = False
|
39 |
+
print(f"{model}")
|
40 |
+
print(f"{model.device}")
|
41 |
|
42 |
|
43 |
@spaces.GPU
|
|
|
52 |
repetition_penalty: float = 1.2,
|
53 |
) -> Iterator[str]:
|
54 |
conversation = []
|
55 |
+
# if system_prompt:
|
56 |
+
# conversation.append({"role": "system", "content": system_prompt})
|
57 |
for user, assistant in chat_history:
|
58 |
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
|
59 |
conversation.append({"role": "user", "content": message})
|