|
import torch |
|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b") |
|
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-7b") |
|
model.half().cuda() |
|
|
|
class StopOnTokens(StoppingCriteria): |
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: |
|
stop_ids = [50278, 50279, 50277, 1, 0] |
|
for stop_id in stop_ids: |
|
if input_ids[0][-1] == stop_id: |
|
return True |
|
return False |
|
|
|
system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version) |
|
- StableLM is a helpful and harmless open-source AI language model developed by StabilityAI. |
|
- StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. |
|
- StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes. |
|
- StableLM will refuse to participate in anything that could harm a human. |
|
""" |
|
|
|
prompt = f"{system_prompt}<|USER|>What's your mood today?<|ASSISTANT|>" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
|
tokens = model.generate( |
|
**inputs, |
|
max_new_tokens=64, |
|
temperature=0.7, |
|
do_sample=True, |
|
stopping_criteria=StoppingCriteriaList([StopOnTokens()]) |
|
) |
|
|
|
|
|
print(tokenizer.decode(tokens[0], skip_special_tokens=True)) |