FlagAlpha
/

Llama3-Chinese-8B-Instruct

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

wuyongyu commited on Apr 24, 2024

Commit

d76c4a5

·

verified ·

1 Parent(s): 8d84de9

Update README.md

Files changed (1) hide show

README.md +35 -2

README.md CHANGED Viewed

@@ -16,11 +16,44 @@ Llama3-Chinese-8B-Instruct基于Llama3-8B中文微调对话模型，由Llama中
 ```
 import transformers
 import torch
 model_id = "FlagAlpha/Llama3-Chinese-8B-Instruct"
 pipeline = transformers.pipeline(
-    "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.float16}, device_map="auto"
 )
-print(pipeline("介绍一下机器学习?"))
 ```

 ```
 import transformers
 import torch
 model_id = "FlagAlpha/Llama3-Chinese-8B-Instruct"
 pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.float16},
+    device="cuda",
 )
+messages = [{"role": "system", "content": ""}]
+messages.append(
+                {"role": "user", "content": "介绍一下机器学习"}
+            )
+prompt = pipeline.tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+terminators = [
+        pipeline.tokenizer.eos_token_id,
+        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+    ]
+outputs = pipeline(
+    prompt,
+    max_new_tokens=512,
+    eos_token_id=terminators,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.9
+)
+content = outputs[0]["generated_text"][len(prompt):]
+print(content)
 ```