import gradio as gr from llama_cpp import Llama import datetime convHistory = '' modelfile = "TrillaTag-0.0.3_V2.gguf" contextlength=128 print("loading model...") stt = datetime.datetime.now() llm = Llama( model_path=modelfile, n_ctx=contextlength, ) dt = datetime.datetime.now() - stt print(f"Model loaded in {dt}") def combine(prompt, temperature, max_new_tokens, top_p, repeat_penalty): global convHistory prompt = f"[INST]{prompt}[/INST]" start = datetime.datetime.now() generation = "" prompt_tokens = f"Prompt Tokens: {len(llm.tokenize(bytes(prompt,encoding='utf-8')))}" for character in llm(prompt, max_tokens=max_new_tokens, stop=[""], temperature = temperature, repeat_penalty = repeat_penalty, top_p = top_p, echo=False, stream=True): generation += character["choices"][0]["text"] answer_tokens = f"Out Tkns: {len(llm.tokenize(bytes(generation,encoding='utf-8')))}" total_tokens = f"Total Tkns: {len(llm.tokenize(bytes(prompt,encoding='utf-8'))) + len(llm.tokenize(bytes(generation,encoding='utf-8')))}" delta = datetime.datetime.now() - start yield generation, delta, prompt_tokens, answer_tokens, total_tokens print(convHistory) return generation, delta, prompt_tokens, answer_tokens, total_tokens #return generation, delta # MAIN GRADIO INTERFACE with gr.Blocks(theme='Medguy/base2') as demo: #theme=gr.themes.Glass() #theme='remilia/Ghostly' #TITLE SECTION with gr.Row(variant='compact'): with gr.Column(scale=10): with gr.Row(): with gr.Column(min_width=80): gentime = gr.Textbox(value="", placeholder="Generation Time:", min_width=50, show_label=False) with gr.Column(min_width=80): prompttokens = gr.Textbox(value="", placeholder="Prompt Tkn:", min_width=50, show_label=False) with gr.Column(min_width=80): outputokens = gr.Textbox(value="", placeholder="Output Tkn:", min_width=50, show_label=False) with gr.Column(min_width=80): totaltokens = gr.Textbox(value="", placeholder="Total Tokens:", min_width=50, show_label=False) # INTERACTIVE INFOGRAPHIC SECTION # PLAYGROUND INTERFACE SECTION with gr.Row(): with gr.Column(scale=1): gr.Markdown( f""" ### Tunning Parameters""") temp = gr.Slider(label="Temperature",minimum=0.0, maximum=1.0, step=0.01, value=0.15) top_p = gr.Slider(label="Top_P",minimum=0.0, maximum=1.0, step=0.01, value=0.15) repPen = gr.Slider(label="Repetition Penalty",minimum=0.0, maximum=4.0, step=0.01, value=1) max_len = gr.Slider(label="Maximum output length", minimum=10,maximum=contextlength,step=2, value=20) btn = gr.Button(value="Generate", variant='primary') with gr.Column(scale=4): prompt = gr.Textbox(label="User Prompt", lines=6, show_copy_button=True) output = gr.Textbox(value="", label="Output", lines = 12, show_copy_button=True) btn.click(combine, inputs=[prompt,temp,max_len,top_p,repPen], outputs=[output,gentime,prompttokens,outputokens,totaltokens]) if __name__ == "__main__": demo.launch(inbrowser=True)