decula commited on
Commit
8ecf1c8
·
1 Parent(s): 7a88d5b

Added safetensors

Browse files
Files changed (1) hide show
  1. qianwen_rag.py +17 -9
qianwen_rag.py CHANGED
@@ -13,7 +13,13 @@ HAS_GPU = False
13
  # Model title and context size limit
14
  ctx_limit = 20000
15
  title = "Qwen2-72B-Instruct-2.0bpw-h-novel-exl2 with RAG"
16
- model_repo = "Orion-zhen/Qwen2-72B-Instruct-2.0bpw-h-novel-exl2"
 
 
 
 
 
 
17
 
18
  # Get the GPU count
19
  try:
@@ -31,20 +37,22 @@ try:
31
  except NVMLError as error:
32
  print(error)
33
 
34
- # Load the model using transformers
35
- print(f"正在加载模型: {model_repo}")
36
-
37
  # 设置设备配置
38
  device = "cpu"
39
  if HAS_GPU:
40
  device = "cuda"
41
 
42
- # 加载模型和分词器
43
- tokenizer = AutoTokenizer.from_pretrained(model_repo)
44
- model = AutoModelForCausalLM.from_pretrained(model_repo)
 
 
 
 
 
 
45
 
46
- # 将模型移动到适当的设备
47
- model = model.to(device)
48
 
49
  # 理解问题并提取关键词的函数
50
  async def understanding_question(question: str):
 
13
  # Model title and context size limit
14
  ctx_limit = 20000
15
  title = "Qwen2-72B-Instruct-2.0bpw-h-novel-exl2 with RAG"
16
+
17
+ # 设置模型文件路径
18
+ model_files = [
19
+ "output-00001-of-00003.safetensors",
20
+ "output-00002-of-00003.safetensors",
21
+ "output-00003-of-00003.safetensors"
22
+ ]
23
 
24
  # Get the GPU count
25
  try:
 
37
  except NVMLError as error:
38
  print(error)
39
 
 
 
 
40
  # 设置设备配置
41
  device = "cpu"
42
  if HAS_GPU:
43
  device = "cuda"
44
 
45
+ print("正在加载模型文件...")
46
+
47
+ # 直接从本地文件加载模型
48
+ tokenizer = AutoTokenizer.from_pretrained(".", local_files_only=True)
49
+ model = AutoModelForCausalLM.from_pretrained(
50
+ ".",
51
+ local_files_only=True,
52
+ device_map=device
53
+ )
54
 
55
+ print("模型加载完成")
 
56
 
57
  # 理解问题并提取关键词的函数
58
  async def understanding_question(question: str):