Upload gen.py
Browse files
gen.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
print('Load libs')
|
4 |
+
import http.server
|
5 |
+
import socketserver
|
6 |
+
import threading
|
7 |
+
import time
|
8 |
+
|
9 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, T5ForConditionalGeneration, T5Tokenizer
|
10 |
+
from collections import defaultdict
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from threading import Thread
|
13 |
+
import requests as rq
|
14 |
+
import random
|
15 |
+
import torch
|
16 |
+
import json
|
17 |
+
import time
|
18 |
+
import os
|
19 |
+
import re
|
20 |
+
|
21 |
+
import difflib
|
22 |
+
import logging
|
23 |
+
logging.getLogger('http.server').setLevel(logging.ERROR)
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
os.system('cls||clear')
|
28 |
+
|
29 |
+
# Constants
|
30 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
31 |
+
|
32 |
+
|
33 |
+
def load_model(model_name_or_path):
|
34 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
|
35 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
|
36 |
+
|
37 |
+
total_params = sum(p.numel() for p in model.parameters())
|
38 |
+
wpe_weights = model.get_input_embeddings().weight
|
39 |
+
wpe_weights_np = wpe_weights.detach().numpy()
|
40 |
+
mt_size = list(wpe_weights.shape)[0]
|
41 |
+
model = model.to(DEVICE)
|
42 |
+
params = {'model': model_name_or_path, 'size': f'{int(total_params / 10**7) / 100}B', 'text': mt_size, 'device': DEVICE}
|
43 |
+
return model, tokenizer, params
|
44 |
+
|
45 |
+
|
46 |
+
def model_log(params):
|
47 |
+
model_name = f'Model: {params["model"]}'
|
48 |
+
param_size = f'Size model: {params["size"]}'
|
49 |
+
max_tokens = f'Maximum Tokens: {params["text"]}'
|
50 |
+
device_info = f'Device: {params["device"]}'
|
51 |
+
max_length = max([len(model_name), len(param_size), len(max_tokens), len(device_info)])
|
52 |
+
padding = ''.zfill(max_length + 4).replace('0', '#')
|
53 |
+
model_name = f'# {model_name}{"".zfill(max_length - len(model_name)).replace("0", " ")} #'
|
54 |
+
param_size = f'# {param_size}{"".zfill(max_length - len(param_size)).replace("0", " ")} #'
|
55 |
+
max_tokens = f'# {max_tokens}{"".zfill(max_length - len(max_tokens)).replace("0", " ")} #'
|
56 |
+
device_info = f'# {device_info}{"".zfill(max_length - len(device_info)).replace("0", " ")} #'
|
57 |
+
return f'{padding}\n{model_name}\n{param_size}\n{max_tokens}\n{device_info}\n{padding}'
|
58 |
+
|
59 |
+
def encode_ids(text, tokenizer):
|
60 |
+
return tokenizer.encode(text, return_tensors="pt").to(DEVICE)
|
61 |
+
|
62 |
+
def generate_step_by_step(config, model, tokenizer,file):
|
63 |
+
if True:
|
64 |
+
text_input = config['text']
|
65 |
+
input_ids = encode_ids(text_input, tokenizer)
|
66 |
+
target = config['maxsize'] - len(input_ids[0])
|
67 |
+
current_length = len(input_ids[0])
|
68 |
+
for i in range(target):
|
69 |
+
output = model.generate(input_ids,
|
70 |
+
do_sample=config['do_sample'],
|
71 |
+
temperature=config['temperature'],
|
72 |
+
top_k=config['top_k'],
|
73 |
+
top_p=config['top_p'],
|
74 |
+
max_length=current_length + 6,
|
75 |
+
pad_token_id=tokenizer.eos_token_id,
|
76 |
+
num_return_sequences=config['num_return_sequences']
|
77 |
+
)
|
78 |
+
current_length += 4
|
79 |
+
text_output = tokenizer.decode(output[0][:current_length])
|
80 |
+
generated_text = text_output[len(config['text']):]
|
81 |
+
'''
|
82 |
+
if generated_text.count('import ') > 2:
|
83 |
+
generated_text+='\nИзвените но бот не умеет писать код\n\n'
|
84 |
+
if '```python' in generated_text:
|
85 |
+
generated_text+='\nИзвените но бот не умеет писать код\n\n'
|
86 |
+
'''
|
87 |
+
if len(generated_text.split('\n')) != 1:
|
88 |
+
return generated_text.split('\n')[0]
|
89 |
+
else:
|
90 |
+
'''
|
91 |
+
cleaned_text, found_repeats = remove_repeated_phrases(generated_text)
|
92 |
+
if found_repeats:
|
93 |
+
return cleaned_text
|
94 |
+
else:
|
95 |
+
'''
|
96 |
+
file2=open(file,'w',encoding='utf-8')
|
97 |
+
file2.write(text_input+generated_text)
|
98 |
+
file2.close()
|
99 |
+
|
100 |
+
os.system('cls||clear')
|
101 |
+
print(str(text_input+generated_text).replace('Me: ','User: '))
|
102 |
+
|
103 |
+
input_ids = encode_ids(text_output, tokenizer)
|
104 |
+
try:
|
105 |
+
return generated_text
|
106 |
+
except:
|
107 |
+
return ''
|
108 |
+
|
109 |
+
def botAw(text, model, tokenizer, params,file):
|
110 |
+
config = {
|
111 |
+
'text': text,
|
112 |
+
'do_sample': True,
|
113 |
+
'temperature': 0.5,
|
114 |
+
'top_k': 20,
|
115 |
+
'top_p': 0.9,
|
116 |
+
'maxsize': params["text"],
|
117 |
+
'num_return_sequences': 1,
|
118 |
+
}
|
119 |
+
generated_text = generate_step_by_step(config, model, tokenizer,file)
|
120 |
+
return generated_text
|
121 |
+
|
122 |
+
print('Load GPT')
|
123 |
+
gpt_model_name = 'ifmain/StableGPT4-Micro-1.6B'
|
124 |
+
gpt_model, gpt_tokenizer, gpt_params = load_model(gpt_model_name)
|
125 |
+
print(model_log(gpt_params))
|
126 |
+
print()
|
127 |
+
|
128 |
+
|
129 |
+
file='file.txt'
|
130 |
+
|
131 |
+
try:
|
132 |
+
f=open(file,'r',encoding='UTF-8')
|
133 |
+
text=f.read()
|
134 |
+
f.close()
|
135 |
+
except:
|
136 |
+
f=open(file,'w',encoding='UTF-8')
|
137 |
+
f.close()
|
138 |
+
|
139 |
+
|
140 |
+
def var2():
|
141 |
+
while True:
|
142 |
+
input('Press Enter to process')
|
143 |
+
f=open(file,'r',encoding='UTF-8')
|
144 |
+
text=f.read()
|
145 |
+
f.close()
|
146 |
+
|
147 |
+
o=botAw(text, gpt_model, gpt_tokenizer, gpt_params,file)
|
148 |
+
|
149 |
+
f=open(file,'w',encoding='UTF-8')
|
150 |
+
f.write(text+o)
|
151 |
+
f.close()
|
152 |
+
|
153 |
+
var2()
|