--- datasets: - CheshireAI/guanaco-unchained --- ## Usage ```python !pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git !pip install -q datasets bitsandbytes einops wandb sentencepiece transformers_stream_generator tiktoken from transformers import AutoModelForCausalLM, AutoTokenizer import torch tokenizer = AutoTokenizer.from_pretrained("TinyPixel/qwen-1.8B-guanaco", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("TinyPixel/qwen-1.8B-guanaco", torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True) device = "cuda:0" from transformers import StoppingCriteria, StoppingCriteriaList stop_token_ids = [[14374, 11097, 25], [14374, 21388, 25]] stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids] from transformers import StoppingCriteria, StoppingCriteriaList class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: for stop_ids in stop_token_ids: if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all(): return True return False stopping_criteria = StoppingCriteriaList([StopOnTokens()]) text = '''### Human: what is the difference between a dog and a cat on a biological level? ### Assistant:''' inputs = tokenizer(text, return_tensors="pt").to(device) outputs = model.generate(**inputs, max_new_tokens=512, stopping_criteria=stopping_criteria, do_sample=True, top_p=0.95, temperature=0.7, top_k=50) print(tokenizer.decode(outputs[0], skip_special_tokens=False) ``` ## Colab notebook Here is a colab notebook to use this model https://colab.research.google.com/drive/1vS5MF2WNXtXMKNDXFua0T43l7HJ51nOW?usp=sharing