Anson69 commited on
Commit
1602143
1 Parent(s): 007c69f

\5CD-AI/visocial-Sailor-4B-Instruct~

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -3,9 +3,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
  import torch
4
  import json
5
  import os
6
- from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
7
- from mistral_common.protocol.instruct.messages import UserMessage
8
- from mistral_common.protocol.instruct.request import ChatCompletionRequest
9
 
10
  # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
11
  # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
@@ -13,7 +10,10 @@ from mistral_common.protocol.instruct.request import ChatCompletionRequest
13
  model_name = "Qwen/Qwen2-7B-Instruct"
14
  # google/gemma-2-9b-it # 18GB
15
  # meta-llama/Meta-Llama-3-8B # 16GB
16
- # Qwen/Qwen2-7B-Instruct # 15GB
 
 
 
17
 
18
 
19
 
@@ -29,9 +29,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
29
  # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
30
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
31
 
32
- special_tokens_dict = {'pad_token': tokenizer.eos_token}
33
- tokenizer.add_special_tokens(special_tokens_dict)
34
- model.resize_token_embeddings(len(tokenizer))
35
 
36
  def generate_response(prompt):
37
  inputs = tokenizer(prompt, return_tensors="pt")
 
3
  import torch
4
  import json
5
  import os
 
 
 
6
 
7
  # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
8
  # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
 
10
  model_name = "Qwen/Qwen2-7B-Instruct"
11
  # google/gemma-2-9b-it # 18GB
12
  # meta-llama/Meta-Llama-3-8B # 16GB
13
+ # Qwen/Qwen2-7B-Instruct # 15GB # wouldn't run due to Memory Limit Exceeded
14
+
15
+ # TODO try the following models:
16
+ # mistralai/Mistral-7B-Instruct-v0.3
17
 
18
 
19
 
 
29
  # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
30
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
31
 
32
+ tokenizer = AutoTokenizer.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
33
+ model = AutoModelForCausalLM.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
 
34
 
35
  def generate_response(prompt):
36
  inputs = tokenizer(prompt, return_tensors="pt")