model change
Browse files- app.py +11 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -3,11 +3,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
3 |
import torch
|
4 |
import json
|
5 |
import os
|
|
|
|
|
|
|
6 |
|
7 |
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
|
8 |
# model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
|
9 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage
|
10 |
-
|
11 |
|
12 |
quantization_config = BitsAndBytesConfig(
|
13 |
load_in_4bit=True,
|
@@ -18,8 +21,14 @@ quantization_config = BitsAndBytesConfig(
|
|
18 |
token = os.getenv("HF_TOKEN")
|
19 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
|
20 |
# model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
|
21 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
|
|
|
|
|
22 |
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
def generate_response(prompt):
|
|
|
3 |
import torch
|
4 |
import json
|
5 |
import os
|
6 |
+
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
7 |
+
from mistral_common.protocol.instruct.messages import UserMessage
|
8 |
+
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
9 |
|
10 |
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
|
11 |
# model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
|
12 |
model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage
|
13 |
+
model = "mistral"
|
14 |
|
15 |
quantization_config = BitsAndBytesConfig(
|
16 |
load_in_4bit=True,
|
|
|
21 |
token = os.getenv("HF_TOKEN")
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
|
23 |
# model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
|
24 |
+
# model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
|
25 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
26 |
+
|
27 |
|
28 |
+
mistral_models_path = "MISTRAL_MODELS_PATH"
|
29 |
+
tokenizer = MistralTokenizer.v1()
|
30 |
+
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
|
31 |
+
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
32 |
|
33 |
|
34 |
def generate_response(prompt):
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
transformers==4.42.3
|
2 |
torch==2.3.1
|
3 |
-
bitsandbytes
|
|
|
|
1 |
transformers==4.42.3
|
2 |
torch==2.3.1
|
3 |
+
bitsandbytes
|
4 |
+
mistral_common
|