Mattral commited on
Commit
d429c0c
1 Parent(s): 166e47c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
 
4
 
5
  # Define the model to be used
6
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -13,19 +14,28 @@ system_prompt_text = "You are a smart and helpful co-worker of Thailand based mu
13
  with open("info.md", "r") as file:
14
  info_md_content = file.read()
15
 
 
 
 
 
 
 
 
 
16
  def format_prompt_mixtral(message, history, info_md_content):
17
  prompt = "<s>"
 
 
 
 
18
  if history:
19
  for user_prompt, bot_response in history:
20
  prompt += f"[INST] {user_prompt} [/INST]"
21
  prompt += f" {bot_response}</s> "
22
- prompt += f"[INST] {info_md_content}\n\n{message} [/INST]"
23
  return prompt
24
 
25
  def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
26
- # Prepend the system prompt to the user prompt
27
- full_prompt = f"{system_prompt_text}, {prompt}"
28
-
29
  generate_kwargs = dict(
30
  temperature=temp,
31
  max_new_tokens=tokens,
@@ -35,7 +45,7 @@ def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
35
  seed=seed,
36
  )
37
 
38
- formatted_prompt = format_prompt_mixtral(full_prompt, history, info_md_content)
39
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
40
  output = ""
41
  for response in stream:
@@ -76,8 +86,8 @@ with gr.Blocks(auth=("Admin", "0112358")) as app: # Add auth here
76
  seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
77
  tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
78
  temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
79
- top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum 1.0, value=0.9)
80
- rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum 2.0, value=1.0)
81
 
82
  hid1 = gr.Number(value=1, visible=False)
83
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
+ import textwrap
5
 
6
  # Define the model to be used
7
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
14
  with open("info.md", "r") as file:
15
  info_md_content = file.read()
16
 
17
+ # Chunk the info.md content into smaller sections
18
+ chunk_size = 2500 # Adjust this size as needed
19
+ info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
20
+
21
+ def get_relevant_chunk(prompt, chunks):
22
+ # For simplicity, we just use the first chunk. You can improve this by adding more sophisticated logic.
23
+ return chunks[0]
24
+
25
  def format_prompt_mixtral(message, history, info_md_content):
26
  prompt = "<s>"
27
+ relevant_chunk = get_relevant_chunk(message, info_md_content)
28
+ prompt += f"{relevant_chunk}\n\n" # Add the relevant chunk of info.md at the beginning
29
+ prompt += f"{system_prompt_text}\n\n" # Add the system prompt
30
+
31
  if history:
32
  for user_prompt, bot_response in history:
33
  prompt += f"[INST] {user_prompt} [/INST]"
34
  prompt += f" {bot_response}</s> "
35
+ prompt += f"[INST] {message} [/INST]"
36
  return prompt
37
 
38
  def chat_inf(prompt, history, seed, temp, tokens, top_p, rep_p):
 
 
 
39
  generate_kwargs = dict(
40
  temperature=temp,
41
  max_new_tokens=tokens,
 
45
  seed=seed,
46
  )
47
 
48
+ formatted_prompt = format_prompt_mixtral(prompt, history, info_md_chunks)
49
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
50
  output = ""
51
  for response in stream:
 
86
  seed = gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
87
  tokens = gr.Slider(label="Max new tokens", value=3840, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="The maximum number of tokens")
88
  temp = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
89
+ top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
90
+ rep_p = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
91
 
92
  hid1 = gr.Number(value=1, visible=False)
93