MyFirstMiniChat / app.py
FM-1976's picture
Update app.py
25f8902
# First Commit inspiration:
#https://huggingface.co/spaces/lambeth-dai/Light-PDF-Web-QA-Chatbot/blob/main/app.py
#---------------------
#model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral',
#model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config))
#---------------------
import gradio as gr
import os
from ctransformers import AutoModelForCausalLM, AutoConfig, Config
import datetime
i_temperature = 0.30
i_max_new_tokens=1100
repo = 'TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF'
model_file = "tinyllama-1.1b-1t-openorca.Q4_K_M.gguf"
i_repetitionpenalty = 1.2
i_contextlength=12048
logfile = 'TinyLlamaOpenOrca1.1B-stream.txt'
print("loading model...")
stt = datetime.datetime.now()
conf = AutoConfig(Config(temperature=i_temperature, repetition_penalty=i_repetitionpenalty, batch_size=64,
max_new_tokens=i_max_new_tokens, context_length=i_contextlength))
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=model_file,
model_type="llama",config = conf)
dt = datetime.datetime.now() - stt
print(f"Model loaded in {dt}")
#MODEL SETTINGS also for DISPLAY
im_user = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/456322.webp'
im_bot = 'https://github.com/fabiomatricardi/TiniLlamaGradioChat/raw/main/TinyLlama_logo.png'
def writehistory(text):
with open(logfile, 'a', encoding='utf-8') as f:
f.write(text)
f.write('\n')
f.close()
with gr.Blocks(theme='ParityError/Interstellar') as demo:
#TITLE SECTION
with gr.Row():
with gr.Column(scale=12):
gr.HTML("<center>"
+ "<h1>πŸ¦™ TinyLlama 1.1B πŸ‹ OpenOrca 4K context window</h2></center>")
gr.Markdown("""
**Currently Running**: [tinyllama-1.1b-1t-openorca.Q4_K_M.gguf](https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF) &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; **Chat History Log File**: *TinyLlamaOpenOrca1.1B-stream.txt*
- **Base Model**: PY007/TinyLlama-1.1B-intermediate-step-480k-1T, Fine tuned on OpenOrca GPT4 subset for 1 epoch,Using CHATML format.
- **License**: Apache 2.0, following the TinyLlama base model. The model output is not censored and the authors do not endorse the opinions in the generated content. Use at your own risk.
- **Notes**: this is my first commit. So far the chat is not considering the conversation history. **Note2**: log TXT file is not working too
""")
gr.Image(value=im_bot, width=80)
# chat and parameters settings
with gr.Row():
with gr.Column(scale=4):
chatbot = gr.Chatbot(height = 350, show_copy_button=True,
avatar_images = [im_user,im_bot])
with gr.Row():
with gr.Column(scale=14):
msg = gr.Textbox(show_label=False,
placeholder="Enter text",
lines=2)
submitBtn = gr.Button("\nπŸ’¬ Send\n", size="lg", variant="primary", min_width=180)
with gr.Column(min_width=50,scale=2):
with gr.Tab(label="Parameter Setting"):
gr.Markdown("# Parameters")
top_p = gr.Slider(
minimum=-0,
maximum=1.0,
value=0.95,
step=0.05,
interactive=True,
label="Top-p",
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.30,
step=0.01,
interactive=True,
label="Temperature",
)
max_length_tokens = gr.Slider(
minimum=0,
maximum=4096,
value=1060,
step=4,
interactive=True,
label="Max Generation Tokens",
)
rep_pen = gr.Slider(
minimum=0,
maximum=5,
value=1.2,
step=0.05,
interactive=True,
label="Repetition Penalty",
)
clear = gr.Button("πŸ—‘οΈ Clear All Messages", variant='secondary')
def user(user_message, history):
writehistory(f"USER: {user_message}")
return "", history + [[user_message, None]]
def bot(history,t,p,m,r):
SYSTEM_PROMPT = """<|im_start|>system
You are a helpful bot. Your answers are clear and concise.
<|im_end|>
"""
prompt = f"<|im_start|>system<|im_end|><|im_start|>user\n{history[-1][0]}<|im_end|>\n<|im_start|>assistant\n"
print(f"history lenght: {len(history)}")
if len(history) == 1:
print("this is the first round")
else:
print("here we should pass more conversations")
history[-1][1] = ""
for character in llm(prompt,
temperature = t,
top_p = p,
repetition_penalty = r,
max_new_tokens=m,
stop = ['<|im_end|>'],
stream = True):
history[-1][1] += character
yield history
writehistory(f"temperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history}\n\n")
#Log in the terminal the messages
print(f"USER: {history[-1][0]}\n---\ntemperature: {t}, top_p: {p}, maxNewTokens: {m}, repetitionPenalty: {r}\n---\nBOT: {history[-1][1]}\n\n")
# Clicking the submitBtn will call the generation with Parameters in the slides
submitBtn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, [chatbot,temperature,top_p,max_length_tokens,rep_pen], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue() #required to yield the streams from the text generation
demo.launch(inbrowser=True)