Model Card for Model ID
This is the chat model finetuned on top of TinyLlama/TinyLlama-1.1B-Chat-v1.0. This is just a experimental model, do not use it in production.
How to use
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
if __name__ == '__main__':
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = AutoModelForCausalLM.from_pretrained('benchang1110/tinyllamatw').to(device)
tokenizer = AutoTokenizer.from_pretrained("benchang1110/tinyllamatw",use_fast=True)
streamer = TextStreamer(tokenizer,skip_prompt=True)
print('Model loaded. Type "restart" to start new chat. Type "exit" to stop this program')
messages = []
while True:
content = input('User:\n')
if content == 'exit':
break
if content == 'restart':
messages = []
print('Chat restarted')
continue
messages.append({'content': content, 'role': 'user'})
tokenized_chat = tokenizer.apply_chat_template(messages,tokenize=True,add_generation_prompt=True,return_tensors='pt').to(device)
untokenized_chat = tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
print('Assistant: ')
outputs = model.generate(tokenized_chat,temperature=0.3,do_sample=True,repetition_penalty=1.2,streamer=streamer,use_cache=True)
output = tokenizer.decode(outputs[0],skip_special_tokens=False)
output = output.replace(untokenized_chat,'').split('<|assistant|>\n')[-1].replace('</s>','')
messages.append({'content': output, 'role': 'assistant'})
- Downloads last month
- 5