|
import gradio as gr |
|
|
|
|
|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig |
|
|
|
from transformers import GPT2TokenizerFast,GPT2LMHeadModel |
|
tokenizer = GPT2TokenizerFast.from_pretrained("AlexWortega/instruct_rugptlarge") |
|
special_tokens_dict = {'additional_special_tokens': ['<code>', '</code>', '<instructionS>', '<instructionE>', '<next>']} |
|
|
|
tokenizer.add_special_tokens(special_tokens_dict) |
|
device = 'cpu' |
|
model = GPT2LMHeadModel.from_pretrained("AlexWortega/instruct_rugptlarge") |
|
|
|
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
|
def generate_prompt(instruction, input=None): |
|
if input: |
|
return f"{input}:" |
|
return f"{instruction}" |
|
|
|
def generate_seqs(q, temp, topp, topk, nb, maxtok): |
|
k=1 |
|
gen_kwargs = { |
|
"min_length": 20, |
|
"max_new_tokens": maxtok, |
|
"top_k": topk, |
|
"top_p": topp, |
|
"do_sample": True, |
|
"early_stopping": True, |
|
"no_repeat_ngram_size": 2, |
|
"temperature":temp, |
|
|
|
"eos_token_id": tokenizer.eos_token_id, |
|
"pad_token_id": tokenizer.eos_token_id, |
|
"use_cache": True, |
|
"repetition_penalty": 1.5, |
|
"length_penalty": 0.8, |
|
"num_beams": nb, |
|
"num_return_sequences": k |
|
} |
|
if len(q)>0: |
|
q = q + '<instructionS>' |
|
else: |
|
q = 'Как зарабатывать денег на нейросетях ?' + '<instructionS>' |
|
t = tokenizer.encode(q, return_tensors='pt').to(device) |
|
g = model.generate(t, **gen_kwargs) |
|
generated_sequences = tokenizer.batch_decode(g, skip_special_tokens=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ans = generated_sequences[0].replace('<instructionS>','\n').replace('<instructionE>','').replace('<|endoftext|>','') |
|
return ans |
|
|
|
description_html = ''' |
|
<p>Обучена на 2v100, коллективом авторов:</p> |
|
<ul> |
|
<li><a href="https://t.me/YallenGusev" target="_blank">@YallenGusev</a></li> |
|
<li><a href="https://t.me/lovedeathtransformers" target="_blank">@lovedeathtransformers</a></li> |
|
<li><a href="https://t.me/alexkuk" target="_blank">@alexkuk</a></li> |
|
<li><a href="https://t.me/chckdskeasfsd" target="_blank">@chckdskeasfsd</a></li> |
|
<li><a href="https://t.me/dno5iq" target="_blank">@dno5iq</a></li> |
|
</ul> |
|
''' |
|
|
|
g = gr.Interface( |
|
fn=generate_seqs, |
|
inputs=[ |
|
gr.components.Textbox( |
|
lines=2, label="Впишите сюда задачу, а я попробую решить", placeholder="Как зарабатывать денег на нейросетях?" |
|
), |
|
|
|
gr.components.Slider(minimum=0.1, maximum=2, value=1.0, label="Temperature"), |
|
gr.components.Slider(minimum=0, maximum=1, value=0.9, label="Top p"), |
|
gr.components.Slider(minimum=0, maximum=100, value=50, label="Top k"), |
|
gr.components.Slider(minimum=0, maximum=5, step=1, value=4, label="Beams"), |
|
gr.components.Slider( |
|
minimum=1, maximum=256, step=1, value=100, label="Max tokens" |
|
), |
|
], |
|
outputs=[ |
|
gr.inputs.Textbox( |
|
lines=5, |
|
label="Output", |
|
) |
|
], |
|
title="ruInstructlarge", |
|
description=description_html) |
|
|
|
|
|
g.queue(concurrency_count=5) |
|
g.launch() |