Anson69 commited on
Commit
2508495
1 Parent(s): 1602143

Qwen/Qwen2-1.5B-Instruct

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -7,13 +7,14 @@ import os
7
  # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
8
  # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
9
  # model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage; needs a GPU to run
10
- model_name = "Qwen/Qwen2-7B-Instruct"
11
  # google/gemma-2-9b-it # 18GB
12
  # meta-llama/Meta-Llama-3-8B # 16GB
13
  # Qwen/Qwen2-7B-Instruct # 15GB # wouldn't run due to Memory Limit Exceeded
14
 
15
  # TODO try the following models:
16
  # mistralai/Mistral-7B-Instruct-v0.3
 
17
 
18
 
19
 
@@ -29,9 +30,6 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
29
  # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
30
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
31
 
32
- tokenizer = AutoTokenizer.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
33
- model = AutoModelForCausalLM.from_pretrained("5CD-AI/visocial-Sailor-4B-Instruct")
34
-
35
  def generate_response(prompt):
36
  inputs = tokenizer(prompt, return_tensors="pt")
37
  with torch.no_grad():
 
7
  # model_name = "meta-llama/Meta-Llama-3-8B-Instruct" # uses more than 16GB
8
  # model_name = "meta-llama/Llama-2-7b-chat-hf" # only part of paid model
9
  # model_name = "mistralai/Mistral-7B-Instruct-v0.1" # try this with quantisation to reduce memory usage; needs a GPU to run
10
+ model_name = "Qwen/Qwen2-1.5B-Instruct"
11
  # google/gemma-2-9b-it # 18GB
12
  # meta-llama/Meta-Llama-3-8B # 16GB
13
  # Qwen/Qwen2-7B-Instruct # 15GB # wouldn't run due to Memory Limit Exceeded
14
 
15
  # TODO try the following models:
16
  # mistralai/Mistral-7B-Instruct-v0.3
17
+ # google/flan-t5
18
 
19
 
20
 
 
30
  # model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
31
  model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
32
 
 
 
 
33
  def generate_response(prompt):
34
  inputs = tokenizer(prompt, return_tensors="pt")
35
  with torch.no_grad():