snorkelai
/

RedPajama-7B-Chat-Curated

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

viethoangtranduong commited on Jul 20, 2023

Commit

15b9b8a

•

1 Parent(s): d76b839

Update README.md

Files changed (1) hide show

README.md +30 -10

README.md CHANGED Viewed

@@ -48,28 +48,48 @@ This requires a GPU with 16GB memory.
 import torch
 import transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM
-MIN_TRANSFORMERS_VERSION = '4.25.1'
-# check transformers version
-assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, f'Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher.'
-# init
 tokenizer = AutoTokenizer.from_pretrained("snorkelai/RedPajama-7B-Chat-Curated")
 model = AutoModelForCausalLM.from_pretrained("snorkelai/RedPajama-7B-Chat-Curated", torch_dtype=torch.float16)
 model = model.to('cuda:0')
-# infer
-prompt = "<human>: Who is Alan Turing?\n<bot>:"
 inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
 input_length = inputs.input_ids.shape[1]
 outputs = model.generate(
-    **inputs, max_new_tokens=128, do_sample=True, temperature=0.7, top_p=0.7, top_k=50, return_dict_in_generate=True
 )
 token = outputs.sequences[0, input_length:]
 output_str = tokenizer.decode(token)
 print(output_str)
 """
-Alan Mathison Turing (23 June 1912  7 June 1954) was an English computer scientist, mathematician, logician, cryptanalyst, philosopher, mathematician, and theoretical biologist.
 """
 ```

 import torch
 import transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM
+```
+Example with RedPajama-7B-Chat-Curated
+```python
+# Example 1 using RedPajama-7B-Chat-Curated
 tokenizer = AutoTokenizer.from_pretrained("snorkelai/RedPajama-7B-Chat-Curated")
 model = AutoModelForCausalLM.from_pretrained("snorkelai/RedPajama-7B-Chat-Curated", torch_dtype=torch.float16)
 model = model.to('cuda:0')
+## inference
+prompt = "<human>: {EDIT_NEEDED}.\n<bot>:"
+inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
+input_length = inputs.input_ids.shape[1]
+outputs = model.generate(
+    **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.7, top_k=50,
+)
+token = outputs.sequences[0, input_length:]
+output_str = tokenizer.decode(token)
+print(output_str)
+"""{EDIT_NEEDED}"""
+```
+Comparing with RedPajama-INCITE-7B-Chat
+```python
+# Example 1 using RedPajama-INCITE-7B-Chat
+tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-INCITE-7B-Chat")
+model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-INCITE-7B-Chat", torch_dtype=torch.float16)
+model = model.to('cuda:0')
+## inference
+prompt = "<human>: {EDIT_NEEDED}.\n<bot>:"
 inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
 input_length = inputs.input_ids.shape[1]
 outputs = model.generate(
+    **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.7, top_k=50,
 )
 token = outputs.sequences[0, input_length:]
 output_str = tokenizer.decode(token)
 print(output_str)
+"""{EDIT_NEEDED}"""
 """
 """
 ```