Tonic commited on
Commit
1b11cae
1 Parent(s): e63384d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  import os
4
  import gradio as gr
5
  import sentencepiece
6
- from tokenization_yi import YiTokenizer
7
 
8
 
9
  from transformers import AutoModelForCausalLM, GPTQConfig, AutoTokenizer, AutoModelForCausalLM
@@ -16,9 +16,13 @@ from tokenization_yi import YiTokenizer
16
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
17
  model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
18
 
19
- gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2})
20
- tokenizer = YiTokenizer.from_pretrained("./")
21
- model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
 
 
 
 
22
 
23
  def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
24
  prompt = message.strip()
 
3
  import os
4
  import gradio as gr
5
  import sentencepiece
6
+ # from tokenization_yi import YiTokenizer
7
 
8
 
9
  from transformers import AutoModelForCausalLM, GPTQConfig, AutoTokenizer, AutoModelForCausalLM
 
16
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
17
  model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
18
 
19
+
20
+ tokenizer = AutoTokenizer.from_pretrained("larryvrh/Yi-34B-200K-Llamafied")
21
+ model = AutoModelForCausalLM.from_pretrained("larryvrh/Yi-34B-200K-Llamafied", device_map="auto", torch_dtype="bfloat16", trust_remote_code=True)
22
+
23
+ # gptq_config = GPTQConfig(bits=4, exllama_config={"version": 2})
24
+ # tokenizer = YiTokenizer.from_pretrained("./")
25
+ # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
26
 
27
  def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
28
  prompt = message.strip()