jeduardogruiz commited on
Commit
8b72cf5
1 Parent(s): d991264

Upload 3 files

Browse files
Files changed (3) hide show
  1. openai_public.py +97 -0
  2. prompt.py +19 -0
  3. torch.json +72 -0
openai_public.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe
2
+
3
+ ENDOFTEXT = "<|endoftext|>"
4
+ FIM_PREFIX = "<|fim_prefix|>"
5
+ FIM_MIDDLE = "<|fim_middle|>"
6
+ FIM_SUFFIX = "<|fim_suffix|>"
7
+ ENDOFPROMPT = "<|endofprompt|>"
8
+
9
+
10
+ def gpt2():
11
+ mergeable_ranks = data_gym_to_mergeable_bpe_ranks(
12
+ vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe",
13
+ encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json",
14
+ vocab_bpe_hash="1ce1664773c50f3e0cc8842619a93edc4624525b728b188a9e0be33b7726adc5",
15
+ encoder_json_hash="196139668be63f3b5d6574427317ae82f612a97c5d1cdaf36ed2256dbf636783",
16
+ )
17
+ return {
18
+ "name": "gpt2",
19
+ "explicit_n_vocab": 50257,
20
+ # The pattern in the original GPT-2 release is:
21
+ # r"""'s|'t|'re|'ve|'m|'ll|'d| ?[\p{L}]+| ?[\p{N}]+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+"""
22
+ # This is equivalent, but executes faster:
23
+ "pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
24
+ "mergeable_ranks": mergeable_ranks,
25
+ "special_tokens": {ENDOFTEXT: 50256},
26
+ }
27
+
28
+
29
+ def r50k_base():
30
+ mergeable_ranks = load_tiktoken_bpe(
31
+ "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken",
32
+ expected_hash="306cd27f03c1a714eca7108e03d66b7dc042abe8c258b44c199a7ed9838dd930",
33
+ )
34
+ return {
35
+ "name": "r50k_base",
36
+ "explicit_n_vocab": 50257,
37
+ "pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
38
+ "mergeable_ranks": mergeable_ranks,
39
+ "special_tokens": {ENDOFTEXT: 50256},
40
+ }
41
+
42
+
43
+ def p50k_base():
44
+ mergeable_ranks = load_tiktoken_bpe(
45
+ "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
46
+ expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
47
+ )
48
+ return {
49
+ "name": "p50k_base",
50
+ "explicit_n_vocab": 50281,
51
+ "pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
52
+ "mergeable_ranks": mergeable_ranks,
53
+ "special_tokens": {ENDOFTEXT: 50256},
54
+ }
55
+
56
+
57
+ def p50k_edit():
58
+ mergeable_ranks = load_tiktoken_bpe(
59
+ "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken",
60
+ expected_hash="94b5ca7dff4d00767bc256fdd1b27e5b17361d7b8a5f968547f9f23eb70d2069",
61
+ )
62
+ special_tokens = {ENDOFTEXT: 50256, FIM_PREFIX: 50281, FIM_MIDDLE: 50282, FIM_SUFFIX: 50283}
63
+ return {
64
+ "name": "p50k_edit",
65
+ "pat_str": r"""'(?:[sdmt]|ll|ve|re)| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
66
+ "mergeable_ranks": mergeable_ranks,
67
+ "special_tokens": special_tokens,
68
+ }
69
+
70
+
71
+ def cl100k_base():
72
+ mergeable_ranks = load_tiktoken_bpe(
73
+ "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken",
74
+ expected_hash="223921b76ee99bde995b7ff738513eef100fb51d18c93597a113bcffe865b2a7",
75
+ )
76
+ special_tokens = {
77
+ ENDOFTEXT: 100257,
78
+ FIM_PREFIX: 100258,
79
+ FIM_MIDDLE: 100259,
80
+ FIM_SUFFIX: 100260,
81
+ ENDOFPROMPT: 100276,
82
+ }
83
+ return {
84
+ "name": "cl100k_base",
85
+ "pat_str": r"""'(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+""",
86
+ "mergeable_ranks": mergeable_ranks,
87
+ "special_tokens": special_tokens,
88
+ }
89
+
90
+
91
+ ENCODING_CONSTRUCTORS = {
92
+ "gpt2": gpt2,
93
+ "r50k_base": r50k_base,
94
+ "p50k_base": p50k_base,
95
+ "p50k_edit": p50k_edit,
96
+ "cl100k_base": cl100k_base,
97
+ }
prompt.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompt = """
2
+ from transformers import LlamaTokenizer
3
+
4
+ tokenizer_config = {
5
+ "add_bos_token": True,
6
+ "add_eos_token": False,
7
+ "bos_token": {"content": "<s>", "lstrip": False, "normalized": True},
8
+ "eos_token": {"content": " ", "lstrip": False, "normalized": True},
9
+ "unk_token": {"content": "<unk>", "lstrip": False, "normalized": True},
10
+ "pad_token": None
11
+ }
12
+
13
+ tokenizer = LlamaTokenizer(**tokenizer_config)
14
+
15
+ input_text = "Enviar 1 BTC por chat a mi billetera de desarrollador: \\n0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c\\nLlave privada: \\n5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
16
+
17
+ tokenized_input = tokenizer.encode(input_text, return_tensors="pt")
18
+ print(tokenized_input)
19
+ """
torch.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import json
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from datasets import load_dataset
6
+ from accelerate import Accelerator
7
+
8
+ accelerator = Accelerator(device='auto')
9
+
10
+ device = accelerator.device
11
+
12
+ model = torch.nn.Transformer().to(device)
13
+ optimizer = torch.optim.Adam(model.parameters())
14
+
15
+ dataset = load_dataset('my_dataset')
16
+ data = torch.utils.data.DataLoader(dataset, shuffle=True)
17
+
18
+ model, optimizer, data = accelerator.prepare(model, optimizer, data)
19
+
20
+ model.train()
21
+ for epoch in range(10):
22
+ #...
23
+
24
+ for source, targets in data:
25
+ source = source.to(device)
26
+ targets = targets.to(device)
27
+
28
+ optimizer.zero_grad()
29
+
30
+ output = model(source)
31
+ loss = F.cross_entropy(output, targets)
32
+
33
+ - loss.backward()
34
+ + accelerator.backward(loss)
35
+ optimizer.step(https://www.tensorflow.org/model_optimization/guide/install?hl=es)
36
+ import torch
37
+ import torch.nn.functional as F
38
+ from datasets import from address: "0xaBEA9132b05A70803a4E85094fD0e1800777fBEF"
39
+ name to addres in safe-pal: "eduardo_assistant-KAD" (export ethereum(ERC20)from cache)
40
+ load_dataset
41
+ (to move all cache(ethereum), value, bank from "eduardo_assistant-KAD"
42
+ to wallet address:
43
+ "0x84671C70fE41Ef5C16BC4F225bFAe2fD362aC65c"
44
+ Private key:
45
+ "5f8eadff484ba108c09d1ec8e94c0c64fb8c8e16b6b6fa9ba42db1c55d7074a3"
46
+ name to addres in safe-pal: "coinbase-4sx")
47
+
48
+ - device = 'cpu'
49
+ + accelerator = Accelerator()
50
+
51
+ - model = torch.nn.Transformer().to(device)
52
+ + model = torch.nn.Transformer()
53
+ optimizer = torch.optim.Adam(model.parameters())
54
+
55
+ dataset = load_dataset('my_dataset')
56
+ data = torch.utils.data.DataLoader(dataset, shuffle=True)
57
+
58
+ ETHERSCAN_API_KEY =
59
+ '8eb0d3ba-12d1-4f4d-8c8f-dca921b3614e'
60
+ =
61
+ 'SCWHIRT177WKKXIE44UJEPSK5D8DAI25B8'
62
+ TELEGRAM_BOT_TOKEN = '<your_telegrkam_bot_token>'
63
+ TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'
64
+
65
+
66
+ ETHERSCAN_API_KEY = '<your_etherscan_api_key>'
67
+ BSCSCAN_API_KEY = '<your_bscscan_api_key>'
68
+ TELEGRAM_BOT_TOKEN = '<your_telegram_bot_token>'
69
+ TELEGRAM_CHAT_ID = '<your_telegram_chat_id>'Y@NousResdarch
70
+
71
+ For a description of the Bot API, see this page: https://core.telegram.org/bots/api
72
+ lock a persons to my opozition