Tonic commited on
Commit
2696633
1 Parent(s): a5e2b1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -9,18 +9,10 @@ from tokenization_yi import YiTokenizer
9
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
10
  model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
11
 
12
- gptq_config = GPTQConfig(
13
- bits=4,
14
- exllama_config={"version": 2},
15
- disable_exllama=True
16
- )
17
  tokenizer = YiTokenizer.from_pretrained("./")
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_id,
20
- device_map="auto",
21
- quantization_config=gptq_config
22
 
23
- )
24
  def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
25
  prompt = get_prompt(message, chat_history)
26
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
 
9
  os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
10
  model_id = "TheBloke/Yi-34B-200K-Llamafied-GPTQ"
11
 
12
+ gptq_config = GPTQConfig( bits=4, exllama_config={"version": 2})
 
 
 
 
13
  tokenizer = YiTokenizer.from_pretrained("./")
14
+ model = AutoModelForCausalLM.from_pretrained( model_id, device_map="cuda", quantization_config=gptq_config)
 
 
 
15
 
 
16
  def run(message, chat_history, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
17
  prompt = get_prompt(message, chat_history)
18
  input_ids = tokenizer.encode(prompt, return_tensors='pt')