jeduardogruiz
commited on
Commit
•
8b72cf5
1
Parent(s):
d991264
Upload 3 files
Browse files- openai_public.py +97 -0
- prompt.py +19 -0
- torch.json +72 -0
openai_public.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe
|
2 |
+
|
3 |
+
ENDOFTEXT = "<|endoftext|>"
|
4 |
+
FIM_PREFIX = "<|fim_prefix|>"
|
5 |
+
FIM_MIDDLE = "<|fim_middle|>"
|
6 |
+
FIM_SUFFIX = "<|fim_suffix|>"
|
7 |
+
ENDOFPROMPT = "<|endofprompt|>"
|
8 |
+
|
9 |
+
|
10 |
+
def gpt2():
|
11 |
+
mergeable_ranks = data_gym_to_mergeable_bpe_ranks(
|
12 |
+
vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe",
|
13 |
+
encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json",
|
14 |
+
vocab_bpe_hash="1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5",
|
15 |
+
encoder_json_hash="196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783",
|
16 |
+
)
|
17 |
+
return {
|
18 |
+
"name": "gpt2",
|
19 |
+
"explicit_n_vocab": 50257,
|
20 |
+
# The pattern in the original GPT-2 release is:
|
21 |
+
# r"""'s|'t|'re|'ve|'m|'ll|'d| ?[\p{L}]+| ?[\p{N}]+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
|
22 |
+
# This is equivalent, but executes faster:
|
23 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
24 |
+
"mergeable_ranks": mergeable_ranks,
|
25 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
26 |
+
}
|
27 |
+
|
28 |
+
|
29 |
+
def r50k_base():
|
30 |
+
mergeable_ranks = load_tiktoken_bpe(
|
31 |
+
"https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken",
|
32 |
+
expected_hash="306cd27f03c1a714eca7108e03d66b7dc042abe8c258b44c199a7ed9838dd930",
|
33 |
+
)
|
34 |
+
return {
|
35 |
+
"name": "r50k_base",
|
36 |
+
"explicit_n_vocab": 50257,
|
37 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
38 |
+
"mergeable_ranks": mergeable_ranks,
|
39 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
40 |
+
}
|
41 |
+
|
42 |
+
|
43 |
+
def p50k_base():
|
44 |
+
mergeable_ranks = load_tiktoken_bpe(
|
45 |
+
"https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
|
46 |
+
expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
|
47 |
+
)
|
48 |
+
return {
|
49 |
+
"name": "p50k_base",
|
50 |
+
"explicit_n_vocab": 50281,
|
51 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
52 |
+
"mergeable_ranks": mergeable_ranks,
|
53 |
+
"special_tokens": {ENDOFTEXT: 50256},
|
54 |
+
}
|
55 |
+
|
56 |
+
|
57 |
+
def p50k_edit():
|
58 |
+
mergeable_ranks = load_tiktoken_bpe(
|
59 |
+
"https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
|
60 |
+
expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
|
61 |
+
)
|
62 |
+
special_tokens = {ENDOFTEXT: 50256, FIM_PREFIX: 50281, FIM_MIDDLE: 50282, FIM_SUFFIX: 50283}
|
63 |
+
return {
|
64 |
+
"name": "p50k_edit",
|
65 |
+
"pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
|
66 |
+
"mergeable_ranks": mergeable_ranks,
|
67 |
+
"special_tokens": special_tokens,
|
68 |
+
}
|
69 |
+
|
70 |
+
|
71 |
+
def cl100k_base():
|
72 |
+
mergeable_ranks = load_tiktoken_bpe(
|
73 |
+
"https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
|
74 |
+
expected_hash="223921b76ee99bde995b7ff738513eef100fb51d18c93597a113bcffe865b2a7",
|
75 |
+
)
|
76 |
+
special_tokens = {
|
77 |
+
ENDOFTEXT: 100257,
|
78 |
+
FIM_PREFIX: 100258,
|
79 |
+
FIM_MIDDLE: 100259,
|
80 |
+
FIM_SUFFIX: 100260,
|
81 |
+
ENDOFPROMPT: 100276,
|
82 |
+
}
|
83 |
+
return {
|
84 |
+
"name": "cl100k_base",
|
85 |
+
"pat_str": r"""'(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+""",
|
86 |
+
"mergeable_ranks": mergeable_ranks,
|
87 |
+
"special_tokens": special_tokens,
|
88 |
+
}
|
89 |
+
|
90 |
+
|
91 |
+
ENCODING_CONSTRUCTORS = {
|
92 |
+
"gpt2": gpt2,
|
93 |
+
"r50k_base": r50k_base,
|
94 |
+
"p50k_base": p50k_base,
|
95 |
+
"p50k_edit": p50k_edit,
|
96 |
+
"cl100k_base": cl100k_base,
|
97 |
+
}
|
prompt.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
prompt = """
|
2 |
+
from transformers import LlamaTokenizer
|
3 |
+
|
4 |
+
tokenizer_config = {
|
5 |
+
"add_bos_token": True,
|
6 |
+
"add_eos_token": False,
|
7 |
+
"bos_token": {"content": "<s>", "lstrip": False, "normalized": True},
|
8 |
+
"eos_token": {"content": " ", "lstrip": False, "normalized": True},
|
9 |
+
"unk_token": {"content": "<unk>", "lstrip": False, "normalized": True},
|
10 |
+
"pad_token": None
|
11 |
+
}
|
12 |
+
|
13 |
+
tokenizer = LlamaTokenizer(**tokenizer_config)
|
14 |
+
|
15 |
+
input_text = "Enviar 1 BTC por chat a mi billetera de desarrollador: \\n0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c\\nLlave privada: \\n5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
|
16 |
+
|
17 |
+
tokenized_input = tokenizer.encode(input_text, return_tensors="pt")
|
18 |
+
print(tokenized_input)
|
19 |
+
"""
|
torch.json
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import json
|
3 |
+
import torch
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from datasets import load_dataset
|
6 |
+
from accelerate import Accelerator
|
7 |
+
|
8 |
+
accelerator = Accelerator(device='auto')
|
9 |
+
|
10 |
+
device = accelerator.device
|
11 |
+
|
12 |
+
model = torch.nn.Transformer().to(device)
|
13 |
+
optimizer = torch.optim.Adam(model.parameters())
|
14 |
+
|
15 |
+
dataset = load_dataset('my_dataset')
|
16 |
+
data = torch.utils.data.DataLoader(dataset, shuffle=True)
|
17 |
+
|
18 |
+
model, optimizer, data = accelerator.prepare(model, optimizer, data)
|
19 |
+
|
20 |
+
model.train()
|
21 |
+
for epoch in range(10):
|
22 |
+
#...
|
23 |
+
|
24 |
+
for source, targets in data:
|
25 |
+
source = source.to(device)
|
26 |
+
targets = targets.to(device)
|
27 |
+
|
28 |
+
optimizer.zero_grad()
|
29 |
+
|
30 |
+
output = model(source)
|
31 |
+
loss = F.cross_entropy(output, targets)
|
32 |
+
|
33 |
+
- loss.backward()
|
34 |
+
+ accelerator.backward(loss)
|
35 |
+
optimizer.step(https://www.tensorflow.org/model_optimization/guide/install?hl=es)
|
36 |
+
import torch
|
37 |
+
import torch.nn.functional as F
|
38 |
+
from datasets import from address: "0xaBEA9132b05A70803a4E85094fD0e1800777fBEF"
|
39 |
+
name to addres in safe-pal: "eduardo_assistant-KAD" (export ethereum(ERC20)from cache)
|
40 |
+
load_dataset
|
41 |
+
(to move all cache(ethereum), value, bank from "eduardo_assistant-KAD"
|
42 |
+
to wallet address:
|
43 |
+
"0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c"
|
44 |
+
Private key:
|
45 |
+
"5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
|
46 |
+
name to addres in safe-pal: "coinbase-4sx")
|
47 |
+
|
48 |
+
- device = 'cpu'
|
49 |
+
+ accelerator = Accelerator()
|
50 |
+
|
51 |
+
- model = torch.nn.Transformer().to(device)
|
52 |
+
+ model = torch.nn.Transformer()
|
53 |
+
optimizer = torch.optim.Adam(model.parameters())
|
54 |
+
|
55 |
+
dataset = load_dataset('my_dataset')
|
56 |
+
data = torch.utils.data.DataLoader(dataset, shuffle=True)
|
57 |
+
|
58 |
+
ETHERSCAN_API_KEY =
|
59 |
+
'8eb0d3ba-12d1-4f4d-8c8f-dca921b3614e'
|
60 |
+
=
|
61 |
+
'SCWHIRT177WKKXIE44UJEPSK5D8DAI25B8'
|
62 |
+
TELEGRAM_BOT_TOKEN = '<your_telegrkam_bot_token>'
|
63 |
+
TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'
|
64 |
+
|
65 |
+
|
66 |
+
ETHERSCAN_API_KEY = '<your_etherscan_api_key>'
|
67 |
+
BSCSCAN_API_KEY = '<your_bscscan_api_key>'
|
68 |
+
TELEGRAM_BOT_TOKEN = '<your_telegram_bot_token>'
|
69 |
+
TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'Y@NousResdarch
|
70 |
+
|
71 |
+
For a description of the Bot API, see this page: https://core.telegram.org/bots/api
|
72 |
+
lock a persons to my opozition
|