TinyLlama
/

TinyLlama-1.1B-Chat-v0.1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

PY007 commited on Sep 16, 2023

Commit

07bcbfe

•

1 Parent(s): 8eaca27

Update README.md

Files changed (1) hide show

README.md +12 -6

README.md CHANGED Viewed

@@ -31,7 +31,7 @@ Do check the [TinyLlama](https://github.com/jzhang38/TinyLlama) github page for
 from transformers import AutoTokenizer
 import transformers
 import torch
-model = "PY007/TinyLlama-1.1B-step-50K-105b"
 tokenizer = AutoTokenizer.from_pretrained(model)
 pipeline = transformers.pipeline(
     "text-generation",
@@ -40,14 +40,20 @@ pipeline = transformers.pipeline(
     device_map="auto",
 )
 sequences = pipeline(
-    'The TinyLlama project aims to pretrain a 1.1B Llama model on 3 trillion tokens. With some proper optimization, we can achieve this within a span of "just" 90 days using 16 A100-40G GPUs 🚀🚀. The training has started on 2023-09-01.',
     do_sample=True,
-    top_k=10,
     num_return_sequences=1,
-    repetition_penalty=1.5,
-    eos_token_id=tokenizer.eos_token_id,
-    max_length=500,
 )
 for seq in sequences:
     print(f"Result: {seq['generated_text']}")

 from transformers import AutoTokenizer
 import transformers
 import torch
+model = "PY007/TinyLlama-1.1B-Chat-v0.1"
 tokenizer = AutoTokenizer.from_pretrained(model)
 pipeline = transformers.pipeline(
     "text-generation",
     device_map="auto",
 )
+prompt = "What are the values in open source projects?"
+formatted_prompt = (
+    f"### Human: {prompt} ### Assistant:"
+)
 sequences = pipeline(
+    formatted_prompt,
     do_sample=True,
+    top_k=50,
+    top_p = 0.7,
     num_return_sequences=1,
+    repetition_penalty=1.1,
+    max_new_tokens=500,
 )
 for seq in sequences:
     print(f"Result: {seq['generated_text']}")