Edit model card

ChatYuan-7B-merge

bilibili     github     kaggle     huggingface

Based on LLAMA's latest Chinese-English dialogue language large model


You can see more detail in this repo


How to use

from transformers import LlamaForCausalLM, AutoTokenizer
import torch

ckpt = "tiansz/ChatYuan-7B-merge"
device = torch.device('cuda')
model = LlamaForCausalLM.from_pretrained(ckpt)
tokenizer = AutoTokenizer.from_pretrained(ckpt)

def answer(prompt):
  prompt = f"用户:{prompt}\n小元:"
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
  generate_ids = model.generate(input_ids, max_new_tokens=1024, do_sample = True, temperature = 0.7)
  output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  response = output[len(prompt):]
  return response

result = answer("你好")
print(result)

int8:
from transformers import LlamaForCausalLM, AutoTokenizer
import torch

ckpt = "tiansz/ChatYuan-7B-merge"
device = torch.device('cuda')
max_memory = f'{int(torch.cuda.mem_get_info()[0]/1024**3)-1}GB'
n_gpus = torch.cuda.device_count()
max_memory = {i: max_memory for i in range(n_gpus)}
model = LlamaForCausalLM.from_pretrained(ckpt, device_map='auto', load_in_8bit=True, max_memory=max_memory)
tokenizer = AutoTokenizer.from_pretrained(ckpt)

def answer(prompt):
  prompt = f"用户:{prompt}\n小元:"
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
  generate_ids = model.generate(input_ids, max_new_tokens=1024, do_sample = True, temperature = 0.7)
  output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
  response = output[len(prompt):]
  return response

result = answer("你好")
print(result)

License

Downloads last month
9
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.