File size: 3,086 Bytes
002bd9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# https://huggingface.co/stabilityai/stablelm-3b-4e1t

import os
import dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed

set_seed(42)
cache_dir = ".model.cache"
# NOTE(xiaoke): load sas_key from .env for huggingface model downloading.
dotenv.load_dotenv(".env")
use_auth_token = os.getenv("USE_AUTH_TOKEN", False)

tokenizer = AutoTokenizer.from_pretrained(
    "stabilityai/stablelm-3b-4e1t",
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)
model = AutoModelForCausalLM.from_pretrained(
    "stabilityai/stablelm-3b-4e1t",
    trust_remote_code=True,
    device_map="auto",
    torch_dtype="auto",
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)

inputs = tokenizer("The weather is always wonderful", return_tensors="pt").to(model.device)
tokens = model.generate(
    **inputs,
    max_new_tokens=64,
    temperature=0.75,
    top_p=0.95,
    do_sample=True,
)
print(tokenizer.decode(tokens[0], skip_special_tokens=True))


# https://huggingface.co/stabilityai/stablelm-zephyr-3b
# - modle after SFT and RLAIF
# - the tokenizer is update from `GPTNeoXTokenizer`
# - Need the latest version of transformers.

from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed

set_seed(42)
cache_dir = ".model.cache"
# NOTE(xiaoke): load sas_key from .env for huggingface model downloading.
dotenv.load_dotenv(".env")
use_auth_token = os.getenv("USE_AUTH_TOKEN", False)

tokenizer = AutoTokenizer.from_pretrained(
    "stabilityai/stablelm-zephyr-3b",
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)
model = AutoModelForCausalLM.from_pretrained(
    "stabilityai/stablelm-zephyr-3b",
    trust_remote_code=True,
    device_map="auto",
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)

prompt = [{"role": "user", "content": 'List 3 synonyms for the word "tiny"'}]
inputs = tokenizer.apply_chat_template(prompt, add_generation_prompt=True, return_tensors="pt")

tokens = model.generate(inputs.to(model.device), max_new_tokens=1024, temperature=0.8, do_sample=True)

print(tokenizer.decode(tokens[0], skip_special_tokens=False))


# https://huggingface.co/microsoft/phi-2

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed

set_seed(42)
cache_dir = ".model.cache"
# NOTE(xiaoke): load sas_key from .env for huggingface model downloading.
dotenv.load_dotenv(".env")
use_auth_token = os.getenv("USE_AUTH_TOKEN", False)


tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/phi-2",
    trust_remote_code=True,
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    torch_dtype="auto",
    trust_remote_code=True,
    device_map="auto",
    cache_dir=cache_dir,
    use_auth_token=use_auth_token,
)

inputs = tokenizer(
    '''def print_prime(n):
   """
   Print all primes between 1 and n
   """''',
    return_tensors="pt",
    return_attention_mask=False,
).to(model.device)

outputs = model.generate(**inputs, max_length=200)
text = tokenizer.batch_decode(outputs)[0]
print(text)