LawOEChat

Sleeping

App Files Files Community

Mattral commited on May 17

Commit

5b0fc3e

•

1 Parent(s): 581cd3b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -23

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import random
 import textwrap
-from collections import Counter
-import re
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@@ -17,29 +15,16 @@ with open("info.md", "r") as file:
     info_md_content = file.read()
 # Chunk the info.md content into smaller sections
-chunk_size = 2000  # Adjust this size as needed
 info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
-def get_relevant_chunks(query, chunks, top_k=2):
-    query_tokens = re.findall(r'\w+', query.lower())
-    chunk_scores = []
-    for chunk in chunks:
-        chunk_tokens = re.findall(r'\w+', chunk.lower())
-        chunk_counter = Counter(chunk_tokens)
-        score = sum(chunk_counter[token] for token in query_tokens)
-        chunk_scores.append((score, chunk))
-    # Sort chunks by score in descending order and return the top_k chunks
-    chunk_scores.sort(reverse=True, key=lambda x: x[0])
-    relevant_chunks = [chunk for score, chunk in chunk_scores[:top_k]]
-    return "\n\n".join(relevant_chunks)
 def format_prompt_mixtral(message, history, info_md_chunks):
     prompt = "<s>"
-    relevant_chunks = get_relevant_chunks(message, info_md_chunks)
-    prompt += f"{relevant_chunks}\n\n"  # Add relevant chunks of info.md at the beginning
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
@@ -79,14 +64,14 @@ def check_rand(inp, val):
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
-with gr.Blocks() as app:
-    gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3> ask anything about PTT </h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
-                inp = gr.Textbox(label="Prompt", lines=5, interactive=True)
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
@@ -111,3 +96,9 @@ with gr.Blocks() as app:
     clear_btn.click(clear_fn, None, [inp, chat])
 app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))

 from huggingface_hub import InferenceClient
 import random
 import textwrap
 # Define the model to be used
 model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
     info_md_content = file.read()
 # Chunk the info.md content into smaller sections
+chunk_size = 2500  # Adjust this size as needed
 info_md_chunks = textwrap.wrap(info_md_content, chunk_size)
+def get_all_chunks(chunks):
+    return "\n\n".join(chunks)
 def format_prompt_mixtral(message, history, info_md_chunks):
     prompt = "<s>"
+    all_chunks = get_all_chunks(info_md_chunks)
+    prompt += f"{all_chunks}\n\n"  # Add all chunks of info.md at the beginning
     prompt += f"{system_prompt_text}\n\n"  # Add the system prompt
     if history:
     else:
         return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
+with gr.Blocks() as app:  # Add auth here
+    gr.HTML("""<center><h1 style='font-size:xx-large;'>PTT Chatbot</h1><br><h3>running on Huggingface Inference </h3><br><h7>EXPERIMENTAL</center>""")
     with gr.Row():
         chat = gr.Chatbot(height=500)
     with gr.Group():
         with gr.Row():
             with gr.Column(scale=3):
+                inp = gr.Textbox(label="Prompt", lines=5, interactive=True)  # Increased lines and interactive
                 with gr.Row():
                     with gr.Column(scale=2):
                         btn = gr.Button("Chat")
     clear_btn.click(clear_fn, None, [inp, chat])
 app.queue(default_concurrency_limit=10).launch(share=True, auth=("admin", "0112358"))
+I have 2000 lines in info.md file, and the model throws error due to character limit.
+Even though I divide chunks, I added all together which is a bad choice.
+what can I do?