Spaces:

Anson69
/

HoolsPic

Sleeping

Anson69 commited on Jul 3

Commit

f3e3281

•

1 Parent(s): aeb430f

model change

Files changed (2) hide show

app.py CHANGED Viewed

@@ -3,11 +3,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 import json
 import os
 # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
 # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
 model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -18,8 +21,14 @@ quantization_config = BitsAndBytesConfig(
 token = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
 # model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
-model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
 def generate_response(prompt):

 import torch
 import json
 import os
+from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
+from mistral_common.protocol.instruct.messages import UserMessage
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
 # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
 # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
 model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage
+model = "mistral"
 quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
 token = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
 # model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
+# model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
+model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
+mistral_models_path = "MISTRAL_MODELS_PATH"
+tokenizer = MistralTokenizer.v1()
+completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])
+tokens = tokenizer.encode_chat_completion(completion_request).tokens
 def generate_response(prompt):

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers==4.42.3
 torch==2.3.1
-bitsandbytes

 transformers==4.42.3
 torch==2.3.1
+bitsandbytes
+mistral_common