Tonic commited on
Commit
b2dee6e
1 Parent(s): 6a4c80e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -1,23 +1,22 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from tokenization_yi import YiTokenizer
3
  import torch
4
  import os
5
  import gradio as gr
6
  import sentencepiece
7
 
8
- model_id = "01-ai/Yi-34B-200K"
9
-
10
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
11
- device = "cuda" if torch.cuda.is_available() else "cpu"
12
- # offload_directory = './model_offload'
13
- # if not os.path.exists(offload_directory):
14
- # os.makedirs(offload_directory)
15
- # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True, load_in_8bit_fp32_cpu_offload=True, offload_folder=offload_directory, trust_remote_code=True)
16
- # model = model.to(device)
17
-
18
- tokenizer = YiTokenizer(vocab_file="./tokenizer.model")
19
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_8bit=True, trust_remote_code=True)
20
-
21
  def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
22
  prompt = get_prompt(message, chat_history)
23
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
 
1
+ from transformers import AutoModelForCausalLM, GPTQConfig
2
  from tokenization_yi import YiTokenizer
3
  import torch
4
  import os
5
  import gradio as gr
6
  import sentencepiece
7
 
8
+ model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
9
+
10
+ gptq_config = GPTQConfig(
11
+ bits=4,
12
+ exllama_config={"version": 2}
13
+ )
14
+ tokenizer = AutoTokenizer.from_pretrained("TheBloke/Yi-34B-200K-Llamafied-GPTQ")
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ model_id,
17
+ device_map="auto",
18
+ quantization_config=gptq_config
19
+ )
 
20
  def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
21
  prompt = get_prompt(message, chat_history)
22
  input_ids = tokenizer.encode(prompt, return_tensors='pt')