from transformers import AutoModelForCausalLM, AutoTokenizer
import modin.pandas as pd
import gradio as gr

tokenizer = AutoTokenizer.from_pretrained("KoboldAI/OPT-2.7B-Nerybus-Mix")

model = AutoModelForCausalLM.from_pretrained("KoboldAI/OPT-2.7B-Nerybus-Mix")

def chat(Prompt):

    input_ids = tokenizer(Prompt, return_tensors="pt").input_ids
    generated_ids = model.generate(input_ids, max_length=256, repetition_penalty = 2.5, top_k = 75)
    bot = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return bot
    
title = 'Nerybus ChatBot'
article = 'This is an experimental model containing a parameter-wise 50/50 blend (weighted average) of the weights of NerysV2-2.7B and ErebusV1-2.7B Preliminary testing produces pretty coherent outputs, it appears to retain the NSFWness of Erebus but with a Nerys-esque twist in terms of prose.'

gr.Interface(fn=chat, inputs='text', outputs='text', title, article).queue(max_queue=10).launch(max_threads=40, debug=True)