duyntnet commited on
Commit
34498ed
1 Parent(s): 553a29a

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +90 -0
README.md ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - gguf
8
+ - imatrix
9
+ - stable-code-3b
10
+ - stabilityai
11
+ ---
12
+ Quantizations of https://huggingface.co/stabilityai/stable-code-3b
13
+
14
+ # From original readme
15
+
16
+ ## Usage
17
+
18
+ Get started generating text with `stable-code-3b` by using the following code snippet:
19
+
20
+ ```python
21
+ import torch
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+ tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-3b")
24
+ model = AutoModelForCausalLM.from_pretrained(
25
+ "stabilityai/stable-code-3b",
26
+ torch_dtype="auto",
27
+ )
28
+ model.cuda()
29
+ inputs = tokenizer("import torch\nimport torch.nn as nn", return_tensors="pt").to(model.device)
30
+ tokens = model.generate(
31
+ **inputs,
32
+ max_new_tokens=48,
33
+ temperature=0.2,
34
+ do_sample=True,
35
+ )
36
+ print(tokenizer.decode(tokens[0], skip_special_tokens=True))
37
+ ```
38
+
39
+ ### Run with Fill in Middle (FIM) ⚡️
40
+
41
+ <details>
42
+ <summary> Click to expand </summary>
43
+
44
+ ```python
45
+ from transformers import AutoModelForCausalLM, AutoTokenizer
46
+ tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-3b")
47
+ model = AutoModelForCausalLM.from_pretrained(
48
+ "stabilityai/stable-code-3b",
49
+ torch_dtype="auto",
50
+ attn_implementation="flash_attention_2",
51
+ )
52
+ model.cuda()
53
+ inputs = tokenizer("<fim_prefix>def fib(n):<fim_suffix> else:\n return fib(n - 2) + fib(n - 1)<fim_middle>", return_tensors="pt").to(model.device)
54
+ tokens = model.generate(
55
+ **inputs,
56
+ max_new_tokens=48,
57
+ temperature=0.2,
58
+ do_sample=True,
59
+ )
60
+ print(tokenizer.decode(tokens[0], skip_special_tokens=True))
61
+ ```
62
+
63
+ </details>
64
+
65
+ ### Run with Flash Attention 2 ⚡️
66
+
67
+ <details>
68
+ <summary> Click to expand </summary>
69
+
70
+ ```python
71
+ from transformers import AutoModelForCausalLM, AutoTokenizer
72
+ tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-3b", trust_remote_code=True)
73
+ model = AutoModelForCausalLM.from_pretrained(
74
+ "stabilityai/stable-code-3b",
75
+ trust_remote_code=True,
76
+ torch_dtype="auto",
77
+ + attn_implementation="flash_attention_2",
78
+ )
79
+ model.cuda()
80
+ inputs = tokenizer("import torch\nimport torch.nn as nn", return_tensors="pt").to(model.device)
81
+ tokens = model.generate(
82
+ **inputs,
83
+ max_new_tokens=48,
84
+ temperature=0.2,
85
+ do_sample=True,
86
+ )
87
+ print(tokenizer.decode(tokens[0], skip_special_tokens=True))
88
+ ```
89
+
90
+ </details>