Spaces:

silk-road
/

Zero-Haruhi-50_Novels-Playground-API

Running

App Files Files Community

Zero-Haruhi-50_Novels-Playground-API / ChatHaruhi /response_GLM_lora.py

silk-road

Upload 15 files

d319ff8 verified 4 months ago

raw history blame contribute delete

No virus

4.23 kB

	import os
	from string import Template
	from typing import List, Dict

	import torch.cuda
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import AutoPeftModelForCausalLM


	aclient = None

	client = None
	tokenizer = None

	END_POINT = "https://hf-mirror.com"


	def init_client(model_name: str, verbose: bool) -> None:
	"""
	初始化模型，通过可用的设备进行模型加载推理。

	Params:
	model_name (`str`)
	HuggingFace中的模型项目名，例如"THUDM/chatglm3-6b"
	"""

	# 将client设置为全局变量
	global client
	global tokenizer

	# 判断使用MPS、CUDA、CPU运行模型
	if torch.cuda.is_available():
	device = torch.device("cuda")
	elif torch.backends.mps.is_available():
	device = torch.device("mps")
	else:
	device = torch.device("cpu")

	if verbose:
	print("Using device: ", device)

	# TODO 上传模型后，更改为从huggingface获取模型
	client = AutoPeftModelForCausalLM.from_pretrained(
	model_name, trust_remote_code=True)
	tokenizer_dir = client.peft_config['default'].base_model_name_or_path
	if verbose:
	print(tokenizer_dir)
	tokenizer = AutoTokenizer.from_pretrained(
	tokenizer_dir, trust_remote_code=True)

	# try:
	# tokenizer = AutoTokenizer.from_pretrained(
	# model_name, trust_remote_code=True, local_files_only=True)
	# client = AutoModelForCausalLM.from_pretrained(
	# model_name, trust_remote_code=True, local_files_only=True)
	# except Exception:
	# if pretrained_model_download(model_name, verbose=verbose):
	# tokenizer = AutoTokenizer.from_pretrained(
	# model_name, trust_remote_code=True, local_files_only=True)
	# client = AutoModelForCausalLM.from_pretrained(
	# model_name, trust_remote_code=True, local_files_only=True)

	# client = client.to(device).eval()
	client = client.eval()


	def pretrained_model_download(model_name_or_path: str, verbose: bool) -> bool:
	"""
	使用huggingface_hub下载模型（model_name_or_path）。下载成功返回true，失败返回False。
	Params:
	model_name_or_path (`str`): 模型的huggingface地址
	Returns:
	`bool` 是否下载成功
	"""
	# TODO 使用hf镜像加速下载未测试windows端

	# 判断是否使用HF_transfer，默认不使用。
	if os.getenv("HF_HUB_ENABLE_HF_TRANSFER") == 1:
	try:
	import hf_transfer
	except ImportError:
	print("Install hf_transfer.")
	os.system("pip -q install hf_transfer")
	import hf_transfer

	# 尝试引入huggingface_hub
	try:
	import huggingface_hub
	except ImportError:
	print("Install huggingface_hub.")
	os.system("pip -q install huggingface_hub")
	import huggingface_hub

	# 使用huggingface_hub下载模型。
	try:
	print(f"downloading {model_name_or_path}")
	huggingface_hub.snapshot_download(
	repo_id=model_name_or_path, endpoint=END_POINT, resume_download=True, local_dir_use_symlinks=False)
	except Exception as e:
	raise e

	return True


	def message2query(messages: List[Dict[str, str]]) -> str:
	# [{'role': 'user', 'content': '老师: 同学请自我介绍一下'}]
	# <\|system\|>
	# You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.
	# <\|user\|>
	# Hello
	# <\|assistant\|>
	# Hello, I'm ChatGLM3. What can I assist you today?
	template = Template("<\|$role\|>\n$content\n")

	return "".join([template.substitute(message) for message in messages])


	def get_response(message, model_name: str = "/workspace/jyh/Zero-Haruhi/checkpoint-1500", verbose: bool = True):
	global client
	global tokenizer

	if client is None:
	init_client(model_name, verbose=verbose)

	if verbose:
	print(message)
	print(message2query(message))

	response, history = client.chat(tokenizer, message2query(message))
	if verbose:
	print((response, history))

	return response