--- license: apache-2.0 --- ``` python import ctranslate2 import transformers generator = ctranslate2.Generator("Praise2112/Mistral-7B-v0.1-int8-ct2", device="cuda", compute_type="int8") # GPU # generator = ctranslate2.Generator("Praise2112/Mistral-7B-v0.1-int8-ct2", device="cpu", compute_type="int8") #CPU tokenizer = transformers.AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") # Unconditional generation. start_tokens = [tokenizer.bos_token] results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10) print(tokenizer.decode(results[0].sequences_ids[0])) # Conditional generation. start_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode("It is")) results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10) print(tokenizer.decode(results[0].sequences_ids[0])) ```