p1atdev commited on
Commit
b1c8d19
1 Parent(s): 0df470a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +25 -3
README.md CHANGED
@@ -48,6 +48,23 @@ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
48
  # rating:sfw, rating:general, 1girl, ahoge, braid, closed eyes, collared dress, dress, flower, full body, hair flower, hair ornament, long hair, night, night sky, outdoors, parted lips, pink flower, pink hair, short sleeves, sky, solo, straight hair, sunflower, very long hair, white flower
49
  ```
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  #### Flash attention (optional)
52
 
53
  Using flash attention can optimize computations, but it is currently only compatible with Linux.
@@ -86,11 +103,16 @@ ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME)
86
  # qunatized version
87
  # ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
88
 
89
- prompt = "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general><|long|>1girl<|input_end|>"
90
- inputs = tokenizer(prompt, return_tensors="pt").input_ids
 
 
 
 
 
91
 
92
  with torch.no_grad():
93
- outputs = model.generate(inputs, generation_config=generation_config)
94
 
95
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
96
  ```
 
48
  # rating:sfw, rating:general, 1girl, ahoge, braid, closed eyes, collared dress, dress, flower, full body, hair flower, hair ornament, long hair, night, night sky, outdoors, parted lips, pink flower, pink hair, short sleeves, sky, solo, straight hair, sunflower, very long hair, white flower
49
  ```
50
 
51
+ You can use `tokenizer.apply_chat_template` to simplify constructiing of prompts:
52
+
53
+ ```py
54
+ inputs = tokenizer.apply_chat_template({
55
+ "rating": "rating:sfw, rating:general",
56
+ "copyright": "original",
57
+ "character": "",
58
+ "general": "1girl",
59
+ "length": "<|long|>"
60
+ }, tokenize=True) # tokenize=False to preview prompt
61
+ # same as input_ids of "<|bos|><rating>rating:sfw, rating:general</rating><copyright>original</copyright><character></character><general><|long|>1girl<|input_end|>"
62
+ with torch.no_grad():
63
+ outputs = model.generate(inputs, generation_config=generation_config)
64
+ ```
65
+
66
+ See [chat_templating document](https://huggingface.co/docs/transformers/main/en/chat_templating) for more detail about `apply_chat_template`.
67
+
68
  #### Flash attention (optional)
69
 
70
  Using flash attention can optimize computations, but it is currently only compatible with Linux.
 
103
  # qunatized version
104
  # ort_model = ORTModelForCausalLM.from_pretrained(MODEL_NAME, file_name="model_quantized.onnx")
105
 
106
+ inputs = tokenizer.apply_chat_template({
107
+ "rating": "rating:sfw, rating:general",
108
+ "copyright": "original",
109
+ "character": "",
110
+ "general": "1girl",
111
+ "length": "<|long|>"
112
+ }, tokenize=True)
113
 
114
  with torch.no_grad():
115
+ outputs = ort_model.generate(inputs, generation_config=generation_config)
116
 
117
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
118
  ```