xzyao commited on
Commit
6d707b7
1 Parent(s): c076b6d

update readme

Browse files
Files changed (1) hide show
  1. README.md +63 -11
README.md CHANGED
@@ -17,20 +17,39 @@ RedPajama-Base-INCITE-6.9B-v1, is a large transformer-based language model devel
17
 
18
  # Quick Start
19
 
 
 
20
  ## GPU Inference
21
 
22
  This requires a GPU with 16GB memory.
 
23
  ```python
 
 
24
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
25
  # init
26
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
27
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", torch_dtype=torch.float16)
28
  model = model.to('cuda:0')
29
  # infer
30
- inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
31
- outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
32
- output_str = tokenizer.decode(outputs[0])
 
 
 
 
 
33
  print(output_str)
 
 
 
34
  ```
35
 
36
  ## GPU Inference in Int8
@@ -38,31 +57,64 @@ print(output_str)
38
  This requires a GPU with 12GB memory.
39
 
40
  ```python
 
 
41
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
42
  # init
43
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
44
- model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", device_map="auto", load_in_8bit=True)
 
45
  # infer
46
- inputs = tokenizer("Hello", return_tensors='pt').to(model.device)
47
- outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
48
- output_str = tokenizer.decode(outputs[0])
 
 
 
 
 
49
  print(output_str)
50
- ```
 
 
51
 
52
  ## CPU Inference
53
 
54
  ```python
 
 
55
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
56
  # init
57
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
58
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", torch_dtype=torch.bfloat16)
59
  # infer
60
- inputs = tokenizer("<human>: Hello!\n<bot>:", return_tensors='pt').to(model.device)
61
- outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8)
62
- output_str = tokenizer.decode(outputs[0])
 
 
 
 
 
63
  print(output_str)
 
 
 
64
  ```
65
 
 
66
 
67
  # Uses
68
 
 
17
 
18
  # Quick Start
19
 
20
+ Please note that the model requires `transformers` version >= 4.25.1.
21
+
22
  ## GPU Inference
23
 
24
  This requires a GPU with 16GB memory.
25
+
26
  ```python
27
+ import torch
28
+ import transformers
29
  from transformers import AutoTokenizer, AutoModelForCausalLM
30
+
31
+ MIN_TRANSFORMERS_VERSION = '4.25.1'
32
+
33
+ # check transformers version
34
+ assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, f'Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher.'
35
+
36
  # init
37
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
38
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", torch_dtype=torch.float16)
39
  model = model.to('cuda:0')
40
  # infer
41
+ prompt = "Alan Turing is"
42
+ inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
43
+ input_length = inputs.input_ids.shape[1]
44
+ outputs = model.generate(
45
+ **inputs, max_new_tokens=128, do_sample=True, temperature=0.7, top_p=0.7, top_k=50, return_dict_in_generate=True
46
+ )
47
+ token = outputs.sequences[0, input_length:]
48
+ output_str = tokenizer.decode(token)
49
  print(output_str)
50
+ """
51
+ widely considered to be the father of modern computer science and artificial intelligence. He was a brilliant mathematician and cryptographer, who worked for the British government during World War II. He was instrumental in breaking the German Enigma code, and is credited with helping to shorten the war by two years...
52
+ """
53
  ```
54
 
55
  ## GPU Inference in Int8
 
57
  This requires a GPU with 12GB memory.
58
 
59
  ```python
60
+ import torch
61
+ import transformers
62
  from transformers import AutoTokenizer, AutoModelForCausalLM
63
+
64
+ MIN_TRANSFORMERS_VERSION = '4.25.1'
65
+
66
+ # check transformers version
67
+ assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, f'Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher.'
68
+
69
  # init
70
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
71
+ model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", device_map='auto', torch_dtype=torch.float16, load_in_8bit=True)
72
+
73
  # infer
74
+ prompt = "Alan Turing is"
75
+ inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
76
+ input_length = inputs.input_ids.shape[1]
77
+ outputs = model.generate(
78
+ **inputs, max_new_tokens=128, do_sample=True, temperature=0.7, top_p=0.7, top_k=50, return_dict_in_generate=True
79
+ )
80
+ token = outputs.sequences[0, input_length:]
81
+ output_str = tokenizer.decode(token)
82
  print(output_str)
83
+ """
84
+ a very well-known name in the world of computer science. It is named after the mathematician Alan Turing. He is famous for his work on the Enigma machine, which was used by the Germans during World War II....
85
+ """```
86
 
87
  ## CPU Inference
88
 
89
  ```python
90
+ import torch
91
+ import transformers
92
  from transformers import AutoTokenizer, AutoModelForCausalLM
93
+
94
+ MIN_TRANSFORMERS_VERSION = '4.25.1'
95
+
96
+ # check transformers version
97
+ assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, f'Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher.'
98
+
99
  # init
100
  tokenizer = AutoTokenizer.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1")
101
  model = AutoModelForCausalLM.from_pretrained("togethercomputer/RedPajama-Base-INCITE-6.9B-v1", torch_dtype=torch.bfloat16)
102
  # infer
103
+ prompt = "Alan Turing is"
104
+ inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
105
+ input_length = inputs.input_ids.shape[1]
106
+ outputs = model.generate(
107
+ **inputs, max_new_tokens=128, do_sample=True, temperature=0.7, top_p=0.7, top_k=50, return_dict_in_generate=True
108
+ )
109
+ token = outputs.sequences[0, input_length:]
110
+ output_str = tokenizer.decode(token)
111
  print(output_str)
112
+ """
113
+ one of the most important figures in the history of computing. He is best known for his work on the development of the modern computer and for his code-breaking work during World War II. He was also a brilliant mathematician and philosopher.
114
+ """
115
  ```
116
 
117
+ Please note that since `LayerNormKernelImpl` is not implemented in fp16 for CPU, we use `bfloat16` for CPU inference.
118
 
119
  # Uses
120