Text Generation
Transformers
PyTorch
English
gpt_neox
causal-lm
Inference Endpoints
text-generation-inference
hardmaru commited on
Commit
b097992
1 Parent(s): a444b43

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -1
README.md CHANGED
@@ -30,6 +30,14 @@ tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-tuned-alpha-7b")
30
  model = AutoModelForCausalLM.from_pretrained("StabilityAI/stablelm-tuned-alpha-7b")
31
  model.half().cuda()
32
 
 
 
 
 
 
 
 
 
33
  system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
34
  - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
35
  - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
@@ -37,7 +45,7 @@ system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
37
  - StableLM will refuse to participate in anything that could harm a human.
38
  """
39
 
40
- prompt = f"{system_prompt}<|USER|>What's your mood today?"
41
 
42
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
43
  tokens = model.generate(
@@ -45,6 +53,7 @@ tokens = model.generate(
45
  max_new_tokens=64,
46
  temperature=0.7,
47
  do_sample=True,
 
48
  )
49
  print(tokenizer.decode(tokens[0], skip_special_tokens=True))
50
  ```
 
30
  model = AutoModelForCausalLM.from_pretrained("StabilityAI/stablelm-tuned-alpha-7b")
31
  model.half().cuda()
32
 
33
+ class StopOnTokens(StoppingCriteria):
34
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
35
+ stop_ids = [50278, 50279, 50277, 1, 0]
36
+ for stop_id in stop_ids:
37
+ if input_ids[0][-1] == stop_id:
38
+ return True
39
+ return False
40
+
41
  system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
42
  - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
43
  - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
 
45
  - StableLM will refuse to participate in anything that could harm a human.
46
  """
47
 
48
+ prompt = f"{system_prompt}<|USER|>What's your mood today?<|ASSISTANT|>"
49
 
50
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
51
  tokens = model.generate(
 
53
  max_new_tokens=64,
54
  temperature=0.7,
55
  do_sample=True,
56
+ stopping_criteria=StoppingCriteriaList([StopOnTokens()])
57
  )
58
  print(tokenizer.decode(tokens[0], skip_special_tokens=True))
59
  ```