decula commited on
Commit ·
8ecf1c8
1
Parent(s): 7a88d5b
Added safetensors
Browse files- qianwen_rag.py +17 -9
qianwen_rag.py
CHANGED
|
@@ -13,7 +13,13 @@ HAS_GPU = False
|
|
| 13 |
# Model title and context size limit
|
| 14 |
ctx_limit = 20000
|
| 15 |
title = "Qwen2-72B-Instruct-2.0bpw-h-novel-exl2 with RAG"
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# Get the GPU count
|
| 19 |
try:
|
|
@@ -31,20 +37,22 @@ try:
|
|
| 31 |
except NVMLError as error:
|
| 32 |
print(error)
|
| 33 |
|
| 34 |
-
# Load the model using transformers
|
| 35 |
-
print(f"正在加载模型: {model_repo}")
|
| 36 |
-
|
| 37 |
# 设置设备配置
|
| 38 |
device = "cpu"
|
| 39 |
if HAS_GPU:
|
| 40 |
device = "cuda"
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
| 47 |
-
model = model.to(device)
|
| 48 |
|
| 49 |
# 理解问题并提取关键词的函数
|
| 50 |
async def understanding_question(question: str):
|
|
|
|
| 13 |
# Model title and context size limit
|
| 14 |
ctx_limit = 20000
|
| 15 |
title = "Qwen2-72B-Instruct-2.0bpw-h-novel-exl2 with RAG"
|
| 16 |
+
|
| 17 |
+
# 设置模型文件路径
|
| 18 |
+
model_files = [
|
| 19 |
+
"output-00001-of-00003.safetensors",
|
| 20 |
+
"output-00002-of-00003.safetensors",
|
| 21 |
+
"output-00003-of-00003.safetensors"
|
| 22 |
+
]
|
| 23 |
|
| 24 |
# Get the GPU count
|
| 25 |
try:
|
|
|
|
| 37 |
except NVMLError as error:
|
| 38 |
print(error)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
| 40 |
# 设置设备配置
|
| 41 |
device = "cpu"
|
| 42 |
if HAS_GPU:
|
| 43 |
device = "cuda"
|
| 44 |
|
| 45 |
+
print("正在加载模型文件...")
|
| 46 |
+
|
| 47 |
+
# 直接从本地文件加载模型
|
| 48 |
+
tokenizer = AutoTokenizer.from_pretrained(".", local_files_only=True)
|
| 49 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 50 |
+
".",
|
| 51 |
+
local_files_only=True,
|
| 52 |
+
device_map=device
|
| 53 |
+
)
|
| 54 |
|
| 55 |
+
print("模型加载完成")
|
|
|
|
| 56 |
|
| 57 |
# 理解问题并提取关键词的函数
|
| 58 |
async def understanding_question(question: str):
|