ffreemt commited on
Commit
7725b42
1 Parent(s): 37f4d69
Files changed (2) hide show
  1. app.py +45 -6
  2. requirements.txt +4 -0
app.py CHANGED
@@ -2,14 +2,53 @@
2
 
3
  transformers 4.31.0
4
  """
5
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
6
  from transformers.generation import GenerationConfig
 
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
 
 
 
 
 
9
 
10
- model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
11
- model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
12
 
13
- response, history = model.chat(tokenizer, "你好", history=None)
14
- # response, history = model.chat(tokenizer, "你好", history=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  print(response)
 
 
2
 
3
  transformers 4.31.0
4
  """
5
+ import os
6
+ import time
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
9
  from transformers.generation import GenerationConfig
10
+ from transformers import BitsAndBytesConfig
11
+ from loguru import logger
12
 
13
+ os.environ["TZ"] = "Asia/Shanghai"
14
+ try:
15
+ time.tzset() # type: ignore # pylint: disable=no-member
16
+ except Exception:
17
+ # Windows
18
+ logger.warning("Windows, cant run time.tzset()")
19
 
20
+ device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
21
+ # has_cuda = False # force cpu
22
 
23
+ model_name = "Qwen/Qwen-7B-Chat"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
25
+
26
+ # quantization configuration for NF4 (4 bits)
27
+ quantization_config = BitsAndBytesConfig(
28
+ load_in_4bit=True,
29
+ bnb_4bit_quant_type='nf4',
30
+ bnb_4bit_compute_dtype=torch.bfloat16
31
+ )
32
+
33
+ # quantization configuration for Int8 (8 bits)
34
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
35
+
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ model_name,
38
+ device_map=device_map,
39
+ quantization_config=quantization_config,
40
+ # max_memory=max_memory,
41
+ trust_remote_code=True,
42
+ ).eval()
43
+
44
+ # model = model.eval()
45
+
46
+ # model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-7B-Chat", device_map="auto", trust_remote_code=True).eval()
47
+
48
+ # 可指定不同的生成长度、top_p等相关超参
49
+ model.generation_config = GenerationConfig.from_pretrained("Qwen/Qwen-7B-Chat", trust_remote_code=True)
50
+
51
+ # response, history = model.chat(tokenizer, "你好", history=None)
52
+ response, history = model.chat(tokenizer, "你好", history=[])
53
  print(response)
54
+
requirements.txt CHANGED
@@ -11,3 +11,7 @@ einops
11
 
12
  torch # 2.0.1
13
  safetensors
 
 
 
 
 
11
 
12
  torch # 2.0.1
13
  safetensors
14
+ bitsandbytes
15
+
16
+ loguru
17
+ about-time