rphrp1985 commited on
Commit
c535860
·
verified ·
1 Parent(s): addc716

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -27,10 +27,18 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
27
 
28
  # model_id = "mistralai/Mistral-7B-v0.3"
29
 
30
- model_id = "CohereForAI/c4ai-command-r-plus-4bit"
31
 
32
 
 
33
 
 
 
 
 
 
 
 
34
 
35
 
36
  #
@@ -46,15 +54,7 @@ def respond(
46
  temperature,
47
  top_p,
48
  ):
49
- tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
50
 
51
- model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
52
- # torch_dtype=torch.bfloat16,
53
- # attn_implementation="flash_attention_2",
54
- # low_cpu_mem_usage=True,
55
- # llm_int8_enable_fp32_cpu_offload=True,
56
- device_map="cuda"
57
- )
58
  messages = [{"role": "user", "content": "Hello, how are you?"}]
59
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
60
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
 
27
 
28
  # model_id = "mistralai/Mistral-7B-v0.3"
29
 
30
+ model_id = "CohereForAI/c4ai-command-r-plus"
31
 
32
 
33
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token= token)
34
 
35
+ model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
36
+ torch_dtype=torch.bfloat16,
37
+ attn_implementation="flash_attention_2",
38
+ # low_cpu_mem_usage=True,
39
+ # llm_int8_enable_fp32_cpu_offload=True,
40
+ device_map="cuda"
41
+ )
42
 
43
 
44
  #
 
54
  temperature,
55
  top_p,
56
  ):
 
57
 
 
 
 
 
 
 
 
58
  messages = [{"role": "user", "content": "Hello, how are you?"}]
59
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
60
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>