Mattral commited on
Commit
5b0fc3e
1 Parent(s): 581cd3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -23
app.py CHANGED
@@ -2,8 +2,6 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
  import textwrap
5
- from collections import Counter
6
- import re
7
 
8
  # Define the model to be used
9
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -17,29 +15,16 @@ with open("info.md", "r") as file:
17
  info_md_content = file.read()
18
 
19
  # Chunk the info.md content into smaller sections
20
- chunk_size = 2000 # Adjust this size as needed
21
  info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
22
 
23
- def get_relevant_chunks(query, chunks, top_k=2):
24
- query_tokens = re.findall(r'\w+', query.lower())
25
- chunk_scores = []
26
-
27
- for chunk in chunks:
28
- chunk_tokens = re.findall(r'\w+', chunk.lower())
29
- chunk_counter = Counter(chunk_tokens)
30
- score = sum(chunk_counter[token] for token in query_tokens)
31
- chunk_scores.append((score, chunk))
32
-
33
- # Sort chunks by score in descending order and return the top_k chunks
34
- chunk_scores.sort(reverse=True, key=lambda x: x[0])
35
- relevant_chunks = [chunk for score, chunk in chunk_scores[:top_k]]
36
-
37
- return "\n\n".join(relevant_chunks)
38
 
39
  def format_prompt_mixtral(message, history, info_md_chunks):
40
  prompt = "<s>"
41
- relevant_chunks = get_relevant_chunks(message, info_md_chunks)
42
- prompt += f"{relevant_chunks}\n\n" # Add relevant chunks of info.md at the beginning
43
  prompt += f"{system_prompt_text}\n\n" # Add the system prompt
44
 
45
  if history:
@@ -79,14 +64,14 @@ def check_rand(inp, val):
79
  else:
80
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
81
 
82
- with gr.Blocks() as app:
83
- gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3> ask anything about PTT </h3><br><h7>EXPERIMENTAL</center>""")
84
  with gr.Row():
85
  chat = gr.Chatbot(height=500)
86
  with gr.Group():
87
  with gr.Row():
88
  with gr.Column(scale=3):
89
- inp = gr.Textbox(label="Prompt", lines=5, interactive=True)
90
  with gr.Row():
91
  with gr.Column(scale=2):
92
  btn = gr.Button("Chat")
@@ -111,3 +96,9 @@ with gr.Blocks() as app:
111
  clear_btn.click(clear_fn, None, [inp, chat])
112
 
113
  app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
 
 
 
 
 
 
 
2
  from huggingface_hub import InferenceClient
3
  import random
4
  import textwrap
 
 
5
 
6
  # Define the model to be used
7
  model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
 
15
  info_md_content = file.read()
16
 
17
  # Chunk the info.md content into smaller sections
18
+ chunk_size = 2500 # Adjust this size as needed
19
  info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
20
 
21
+ def get_all_chunks(chunks):
22
+ return "\n\n".join(chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def format_prompt_mixtral(message, history, info_md_chunks):
25
  prompt = "<s>"
26
+ all_chunks = get_all_chunks(info_md_chunks)
27
+ prompt += f"{all_chunks}\n\n" # Add all chunks of info.md at the beginning
28
  prompt += f"{system_prompt_text}\n\n" # Add the system prompt
29
 
30
  if history:
 
64
  else:
65
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
66
 
67
+ with gr.Blocks() as app: # Add auth here
68
+ gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference </h3><br><h7>EXPERIMENTAL</center>""")
69
  with gr.Row():
70
  chat = gr.Chatbot(height=500)
71
  with gr.Group():
72
  with gr.Row():
73
  with gr.Column(scale=3):
74
+ inp = gr.Textbox(label="Prompt", lines=5, interactive=True) # Increased lines and interactive
75
  with gr.Row():
76
  with gr.Column(scale=2):
77
  btn = gr.Button("Chat")
 
96
  clear_btn.click(clear_fn, None, [inp, chat])
97
 
98
  app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
99
+
100
+
101
+
102
+ I have 2000 lines in info.md file, and the model throws error due to character limit.
103
+ Even though I divide chunks, I added all together which is a bad choice.
104
+ what can I do?