PengQu
/

open_llama_7b_v2_vicuna_Chinese

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

PengQu commited on Aug 3, 2023

Commit

8a863c0

•

1 Parent(s): 59200e7

Update README.md

Files changed (1) hide show

README.md +4 -3

README.md CHANGED Viewed

@@ -28,10 +28,11 @@ open_llama_7b_v2_vicuna_Chinese is a chat model supervised finetuned on vicuna s
 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("PengQu/open_llama_7b_v2_vicuna_Chinese",use_fast=False)
-model = AutoModelForCausalLM.from_pretrained("PengQu/open_llama_7b_v2_vicuna_Chinese").to("cuda:6")
-prompt = '用flask写一个简单的http服务器。'
-input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda:6")
 generation_output = model.generate(input_ids=input_ids, max_new_tokens=512)
 print(tokenizer.decode(generation_output[0],skip_special_tokens=True))

 from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("PengQu/open_llama_7b_v2_vicuna_Chinese",use_fast=False)
+model = AutoModelForCausalLM.from_pretrained("PengQu/open_llama_7b_v2_vicuna_Chinese").to("cuda")
+instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
+prompt = instruction.format('用flask写一个简单的http服务器。')
+input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
 generation_output = model.generate(input_ids=input_ids, max_new_tokens=512)
 print(tokenizer.decode(generation_output[0],skip_special_tokens=True))