File size: 4,846 Bytes
8810e79 d429c0c 8810e79 f26a1bd 8810e79 f26a1bd 2200cb1 f26a1bd 259c5c5 166e47c 259c5c5 aeb7f98 d429c0c 259c5c5 d429c0c 8def8b6 d429c0c aeb7f98 31dc28b 8810e79 d429c0c 03afc21 aeb7f98 03afc21 d429c0c 8810e79 31dc28b 8810e79 31dc28b 8810e79 03afc21 8810e79 aeb7f98 7d2986f 8810e79 31dc28b 8810e79 31dc28b 8810e79 aeb7f98 5b0fc3e 8810e79 5b0fc3e 8810e79 d429c0c 8810e79 31dc28b 8810e79 7d2986f 8810e79 2f3318a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
from huggingface_hub import InferenceClient
import random
import textwrap
# Define the model to be used
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
client = InferenceClient(model)
# Embedded system prompt
system_prompt_text = "You are a smart and helpful co-worker of Thailand based multi-national company PTT, and PTTEP. You help with any kind of request and provide a detailed answer to the question. But if you are asked about something unethical or dangerous, you must refuse and provide a safe and respectful way to handle that."
# Read the content of the info.md and info2.md files
with open("info.md", "r") as file:
info_md_content = file.read()
with open("info2.md", "r") as file:
info2_md_content = file.read()
# Chunk the info.md and info2.md content into smaller sections
chunk_size = 1500 # Adjust this size as needed to fit the context window
info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
info2_md_chunks = textwrap.wrap(info2_md_content, chunk_size)
# Combine both sets of chunks
all_chunks = info_md_chunks + info2_md_chunks
# Function to initialize the conversation history with chunks
def initialize_history(chunks):
history = []
for chunk in chunks:
history.append(("System Information", chunk))
return history
# Initialize history with initial chunks
history = initialize_history(all_chunks[:2]) # Starting with the first two chunks for example
def format_prompt_mixtral(message, history):
prompt = "<s>"
prompt += f"{system_prompt_text}\n\n" # Add the system prompt
# Include the initial context from the chunks
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
# Add the current user message
prompt += f"[INST] {message} [/INST]"
return prompt
def chat_inf(message, history, seed, temp, tokens, top_p, rep_p):
generate_kwargs = dict(
temperature=temp,
max_new_tokens=tokens,
top_p=top_p,
repetition_penalty=rep_p,
do_sample=True,
seed=seed,
)
formatted_prompt = format_prompt_mixtral(message, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield [(message, output)]
history.append((message, output))
yield history
def clear_fn():
global history
history = initialize_history(all_chunks[:2]) # Reset to initial chunks
return None, None
rand_val = random.randint(1, 1111111111111111)
def check_rand(rand, val):
if rand:
return random.randint(1, 1111111111111111)
else:
return int(val)
with gr.Blocks() as app:
gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference </h3><br><h7>EXPERIMENTAL</center>""")
with gr.Row():
chat = gr.Chatbot(height=500)
with gr.Group():
with gr.Row():
with gr.Column(scale=3):
inp = gr.Textbox(label="Prompt", lines=5, interactive=True) # Increased lines and interactive
with gr.Row():
with gr.Column(scale=2):
btn = gr.Button("Chat")
with gr.Column(scale=1):
with gr.Group():
stop_btn = gr.Button("Stop")
clear_btn = gr.Button("Clear")
with gr.Column(scale=1):
with gr.Group():
rand = gr.Checkbox(label="Random Seed", value=True)
seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
hid1 = gr.Number(value=1, visible=False)
def on_chat(message, chat, seed, temp, tokens, top_p, rep_p):
chat.clear()
history = initialize_history(all_chunks[:2])
for response in chat_inf(message, history, seed, temp, tokens, top_p, rep_p):
chat.append(*response)
go = btn.click(on_chat, [inp, chat, seed, temp, tokens, top_p, rep_p], chat)
stop_btn.click(None, None, None, cancels=[go])
clear_btn.click(clear_fn, None, [inp, chat])
app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
|