\5CD-AI/visocial-Sailor-4B-Instruct~
Browse files
app.py
CHANGED
@@ -3,9 +3,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
3 |
import torch
|
4 |
import json
|
5 |
import os
|
6 |
-
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
7 |
-
from mistral_common.protocol.instruct.messages import UserMessage
|
8 |
-
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
9 |
|
10 |
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
|
11 |
# model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
|
@@ -13,7 +10,10 @@ from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
|
13 |
model_name = "Qwen/Qwen2-7B-Instruct"
|
14 |
# google/gemma-2-9b-it # 18GB
|
15 |
# meta-llama/Meta-Llama-3-8B # 16GB
|
16 |
-
# Qwen/Qwen2-7B-Instruct # 15GB
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
|
@@ -29,9 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
|
|
29 |
# model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
|
30 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
model.resize_token_embeddings(len(tokenizer))
|
35 |
|
36 |
def generate_response(prompt):
|
37 |
inputs = tokenizer(prompt, return_tensors="pt")
|
|
|
3 |
import torch
|
4 |
import json
|
5 |
import os
|
|
|
|
|
|
|
6 |
|
7 |
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
|
8 |
# model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
|
|
|
10 |
model_name = "Qwen/Qwen2-7B-Instruct"
|
11 |
# google/gemma-2-9b-it # 18GB
|
12 |
# meta-llama/Meta-Llama-3-8B # 16GB
|
13 |
+
# Qwen/Qwen2-7B-Instruct # 15GB # wouldn't run due to Memory Limit Exceeded
|
14 |
+
|
15 |
+
# TODO try the following models:
|
16 |
+
# mistralai/Mistral-7B-Instruct-v0.3
|
17 |
|
18 |
|
19 |
|
|
|
29 |
# model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
|
30 |
model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
|
31 |
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
|
33 |
+
model = AutoModelForCausalLM.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
|
|
|
34 |
|
35 |
def generate_response(prompt):
|
36 |
inputs = tokenizer(prompt, return_tensors="pt")
|