# First Commit inspiration: #https://huggingface.co/spaces/lambeth-dai/Light-PDF-Web-QA-Chatbot/blob/main/app.py #--------------------- #model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', #model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) #--------------------- import gradio as gr import os from ctransformers import AutoModelForCausalLM, AutoConfig, Config import datetime i_temperature = 0.30 i_max_new_tokens=1100 repo = 'TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF' model_file = "tinyllama-1.1b-1t-openorca.Q4_K_M.gguf" i_repetitionpenalty = 1.2 i_contextlength=12048 logfile = 'TinyLlamaOpenOrca1.1B-stream.txt' print("loading model...") stt = datetime.datetime.now() conf = AutoConfig(Config(temperature=i_temperature, repetition_penalty=i_repetitionpenalty, batch_size=64, max_new_tokens=i_max_new_tokens, context_length=i_contextlength)) llm = AutoModelForCausalLM.from_pretrained(repo, model_file=model_file, model_type="llama",config = conf) dt = datetime.datetime.now() - stt print(f"Model loaded in {dt}") #MODEL SETTINGS also for DISPLAY im_user = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/456322.webp' im_bot = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/TinyLlama_logo.png' def writehistory(text): with open(logfile, 'a', encoding='utf-8') as f: f.write(text) f.write('\n') f.close() with gr.Blocks(theme='ParityError/Interstellar') as demo: #TITLE SECTION with gr.Row(): with gr.Column(scale=12): gr.HTML("
" + "

šŸ¦™ TinyLlama 1.1B šŸ‹ OpenOrca 4K context window

") gr.Markdown(""" **Currently Running**: [tinyllama-1.1b-1t-openorca.Q4_K_M.gguf](https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF)        **Chat History Log File**: *TinyLlamaOpenOrca1.1B-stream.txt* - **Base Model**: PY007/TinyLlama-1.1B-intermediate-step-480k-1T, Fine tuned on OpenOrca GPT4 subset for 1 epoch,Using CHATML format. - **License**: Apache 2.0, following the TinyLlama base model. The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk. - **Notes**: this is my first commit. So far the chat is not considering the conversation history. **Note2**: log TXT file is not working too """) gr.Image(value=im_bot, width=80) # chat and parameters settings with gr.Row(): with gr.Column(scale=4): chatbot = gr.Chatbot(height = 350, show_copy_button=True, avatar_images = [im_user,im_bot]) with gr.Row(): with gr.Column(scale=14): msg = gr.Textbox(show_label=False, placeholder="Enter text", lines=2) submitBtn = gr.Button("\nšŸ’¬ Send\n", size="lg", variant="primary", min_width=180) with gr.Column(min_width=50,scale=2): with gr.Tab(label="Parameter Setting"): gr.Markdown("# Parameters") top_p = gr.Slider( minimum=-0, maximum=1.0, value=0.95, step=0.05, interactive=True, label="Top-p", ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.30, step=0.01, interactive=True, label="Temperature", ) max_length_tokens = gr.Slider( minimum=0, maximum=4096, value=1060, step=4, interactive=True, label="Max Generation Tokens", ) rep_pen = gr.Slider( minimum=0, maximum=5, value=1.2, step=0.05, interactive=True, label="Repetition Penalty", ) clear = gr.Button("šŸ—‘ļø Clear All Messages", variant='secondary') def user(user_message, history): writehistory(f"USER: {user_message}") return "", history + [[user_message, None]] def bot(history,t,p,m,r): SYSTEM_PROMPT = """<|im_start|>system You are a helpful bot. Your answers are clear and concise. <|im_end|> """ prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n" print(f"history lenght: {len(history)}") if len(history) == 1: print("this is the first round") else: print("here we should pass more conversations") history[-1][1] = "" for character in llm(prompt, temperature = t, top_p = p, repetition_penalty = r, max_new_tokens=m, stop = ['<|im_end|>'], stream = True): history[-1][1] += character yield history writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n") #Log in the terminal the messages print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n") # Clicking the submitBtn will call the generation with Parameters in the slides submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() #required to yield the streams from the text generation demo.launch(inbrowser=True)