LawOEChat

Sleeping

App Files Files Community

Mattral commited on May 17

Commit

581cd3b

•

1 Parent(s): e1d6a1b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import random
 import textwrap
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -15,16 +17,29 @@ with open("info.md", "r") as file:
     info_md_content = file.read()
 # Chunk the info.md content into smaller sections
-chunk_size = 2500  # Adjust this size as needed
 info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
-def get_all_chunks(chunks):
-    return "\n\n".join(chunks)
 def format_prompt_mixtral(message, history, info_md_chunks):
     prompt = "<s>"
-    all_chunks = get_all_chunks(info_md_chunks)
-    prompt += f"{all_chunks}\n\n"  # Add all chunks of info.md at the beginning
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
@@ -64,14 +79,14 @@ def check_rand(inp, val):
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
-with gr.Blocks() as app:  # Add auth here
-    gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference </h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
-                inp = gr.Textbox(label="Prompt", lines=5, interactive=True)  # Increased lines and interactive
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")

 from huggingface_hub import InferenceClient
 import random
 import textwrap
+from collections import Counter
+import re
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     info_md_content = file.read()
 # Chunk the info.md content into smaller sections
+chunk_size = 2000  # Adjust this size as needed
 info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
+def get_relevant_chunks(query, chunks, top_k=2):
+    query_tokens = re.findall(r'\w+', query.lower())
+    chunk_scores = []
+    for chunk in chunks:
+        chunk_tokens = re.findall(r'\w+', chunk.lower())
+        chunk_counter = Counter(chunk_tokens)
+        score = sum(chunk_counter[token] for token in query_tokens)
+        chunk_scores.append((score, chunk))
+    # Sort chunks by score in descending order and return the top_k chunks
+    chunk_scores.sort(reverse=True, key=lambda x: x[0])
+    relevant_chunks = [chunk for score, chunk in chunk_scores[:top_k]]
+    return "\n\n".join(relevant_chunks)
 def format_prompt_mixtral(message, history, info_md_chunks):
     prompt = "<s>"
+    relevant_chunks = get_relevant_chunks(message, info_md_chunks)
+    prompt += f"{relevant_chunks}\n\n"  # Add relevant chunks of info.md at the beginning
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
+with gr.Blocks() as app:
+    gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3> ask anything about PTT </h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
+                inp = gr.Textbox(label="Prompt", lines=5, interactive=True)
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")