Spaces:

thenativefox
/

RAG

Sleeping

App Files Files Community

thenativefox commited on Jun 24

Commit

93c49cb

•

1 Parent(s): 1c860fb

fix async issues

Browse files

Files changed (4) hide show

app.py +49 -25
backend/query_llm.py +22 -2
backend/semantic_search.py +0 -1
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -12,8 +12,11 @@ from jinja2 import Environment, FileSystemLoader
 from backend.query_llm import generate_hf, generate_openai
 from backend.semantic_search import retrieve
 TOP_K = int(os.getenv("TOP_K", 4))
 proj_dir = Path(__file__).parent
 # Setting up the logging
@@ -29,13 +32,17 @@ template_html = env.get_template('template_html.j2')
 def add_text(history, text):
     history = [] if history is None else history
     history = history + [(text, None)]
     return history, gr.Textbox(value="", interactive=False)
 def bot(history, api_kind):
     query = history[-1][0]
     if not query:
         raise gr.Warning("Please submit a non-empty string as a prompt")
@@ -52,13 +59,14 @@ def bot(history, api_kind):
     # Create Prompt
     prompt = template.render(documents=documents, query=query)
     prompt_html = template_html.render(documents=documents, query=query)
     if api_kind == "HuggingFace":
-         generate_fn = generate_hf
     elif api_kind == "OpenAI":
-         generate_fn = generate_openai
     else:
-         raise gr.Error(f"API {api_kind} is not supported")
     history[-1][1] = ""
     for character in generate_fn(prompt, history[:-1]):
@@ -68,40 +76,56 @@ def bot(history, api_kind):
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(
-            [],
-            elem_id="chatbot",
-            avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
-                           'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
-            bubble_full_width=False,
-            show_copy_button=True,
-            show_share_button=True,
-            )
     with gr.Row():
         txt = gr.Textbox(
-                scale=3,
-                show_label=False,
-                placeholder="Enter text and press enter",
-                container=False,
-                )
         txt_btn = gr.Button(value="Submit text", scale=1)
     api_kind = gr.Radio(choices=["HuggingFace", "OpenAI"], value="HuggingFace")
     prompt_html = gr.HTML()
-    # Turn off interactivity while generating if you click
-    txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-            bot, [chatbot, api_kind], [chatbot, prompt_html])
-    # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
     # Turn off interactivity while generating if you hit enter
-    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
-            bot, [chatbot, api_kind], [chatbot, prompt_html])
-    # Turn it back on
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
 demo.queue()
-demo.launch(debug=True)

 from backend.query_llm import generate_hf, generate_openai
 from backend.semantic_search import retrieve
+from dotenv import load_dotenv
+load_dotenv()
 TOP_K = int(os.getenv("TOP_K", 4))
+HF_TOKEN = os.getenv("HF_TOKEN")
 proj_dir = Path(__file__).parent
 # Setting up the logging
 def add_text(history, text):
+    logger.info(f'Adding text: {text}')
     history = [] if history is None else history
     history = history + [(text, None)]
+    logger.info(f'Updated history: {history}')
     return history, gr.Textbox(value="", interactive=False)
 def bot(history, api_kind):
+    logger.info(f'Bot function called with history: {history} and api_kind: {api_kind}')
     query = history[-1][0]
+    logger.info(f'Query: {query}')
     if not query:
         raise gr.Warning("Please submit a non-empty string as a prompt")
     # Create Prompt
     prompt = template.render(documents=documents, query=query)
     prompt_html = template_html.render(documents=documents, query=query)
+    logger.info(f'Prompt created: {prompt}')
     if api_kind == "HuggingFace":
+        generate_fn = generate_hf
     elif api_kind == "OpenAI":
+        generate_fn = generate_openai
     else:
+        raise gr.Error(f"API {api_kind} is not supported")
     history[-1][1] = ""
     for character in generate_fn(prompt, history[:-1]):
 with gr.Blocks() as demo:
     chatbot = gr.Chatbot(
+        [],
+        elem_id="chatbot",
+        avatar_images=('https://aui.atlassian.com/aui/8.8/docs/images/avatar-person.svg',
+                       'https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg'),
+        bubble_full_width=False,
+        show_copy_button=True,
+        show_share_button=True,
+    )
     with gr.Row():
         txt = gr.Textbox(
+            scale=3,
+            show_label=False,
+            placeholder="Enter text and press enter",
+            container=False,
+        )
         txt_btn = gr.Button(value="Submit text", scale=1)
     api_kind = gr.Radio(choices=["HuggingFace", "OpenAI"], value="HuggingFace")
     prompt_html = gr.HTML()
+    # Turn off interactivity while generating if you click
+    txt_msg = txt_btn.click(
+        fn=add_text,
+        inputs=[chatbot, txt],
+        outputs=[chatbot, txt],
+        queue=False
+    ).then(
+        fn=bot,
+        inputs=[chatbot, api_kind],
+        outputs=[chatbot, prompt_html],
+        queue=False
+    )
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
     # Turn off interactivity while generating if you hit enter
+    txt_msg = txt.submit(
+        fn=add_text,
+        inputs=[chatbot, txt],
+        outputs=[chatbot, txt],
+        queue=False
+    ).then(
+        fn=bot,
+        inputs=[chatbot, api_kind],
+        outputs=[chatbot, prompt_html],
+        queue=False
+    )
     txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
 demo.queue()
+logger.info('Launching Gradio app...')
+demo.launch(debug=True)

backend/query_llm.py CHANGED Viewed

@@ -1,23 +1,27 @@
 import openai
 import gradio as gr
 import os
 from typing import Any, Dict, Generator, List
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 HF_TOKEN = os.getenv("HF_TOKEN")
 HF_MODEL = os.getenv("HF_MODEL")
 OPENAI_MODEL = os.getenv("OPENAI_MODEL")
 HF_CLIENT = InferenceClient(
     os.getenv("HF_MODEL"),
     token=HF_TOKEN
 )
-OAI_CLIENT = openai.Client(api_key=OPENAI_KEY)
 TOKENIZER = AutoTokenizer.from_pretrained(HF_MODEL)
 HF_GENERATE_KWARGS = {
@@ -81,10 +85,17 @@ def generate_hf(prompt: str, history: str) -> Generator[str, None, str]:
             details=True,
             return_full_text=False
         )
         output = ""
         for response in stream:
             output += response.token.text
             yield output
     except Exception as e:
         if "Too Many Requests" in str(e):
@@ -109,6 +120,14 @@ def generate_openai(prompt: str, history: str) -> Generator[str, None, str]:
     formatted_prompt = format_prompt(prompt, "openai")
     try:
         stream = OAI_CLIENT.chat.completions.create(
             model=os.getenv("OPENAI_MODEL"),
             messages=formatted_prompt,
@@ -122,9 +141,10 @@ def generate_openai(prompt: str, history: str) -> Generator[str, None, str]:
                 yield output
     except Exception as e:
         if "Too Many Requests" in str(e):
             raise gr.Error("ERROR: Too many requests on OpenAI client")
         elif "You didn't provide an API key" in str(e):
             raise gr.Error("Authentication error: OpenAI key was either not provided or incorrect")
         else:
-            raise gr.Error(f"Unhandled Exception: {str(e)}")

 import openai
 import gradio as gr
 import os
+import logging
 from typing import Any, Dict, Generator, List
 from huggingface_hub import InferenceClient
 from transformers import AutoTokenizer
+from dotenv import load_dotenv
+load_dotenv()
 OPENAI_KEY = os.getenv("OPENAI_API_KEY")
 HF_TOKEN = os.getenv("HF_TOKEN")
 HF_MODEL = os.getenv("HF_MODEL")
 OPENAI_MODEL = os.getenv("OPENAI_MODEL")
+OAI_CLIENT = openai.Client(api_key=OPENAI_KEY)
 HF_CLIENT = InferenceClient(
     os.getenv("HF_MODEL"),
     token=HF_TOKEN
 )
+openai.api_key = OPENAI_KEY
 TOKENIZER = AutoTokenizer.from_pretrained(HF_MODEL)
 HF_GENERATE_KWARGS = {
             details=True,
             return_full_text=False
         )
         output = ""
+        final_output = []
         for response in stream:
             output += response.token.text
+            final_output.append(response.token.text)
+            logging.info(f"Current output: {output}")
             yield output
+        # Print the final output
+        logging.info(f"Final output: {''.join(final_output)}")
     except Exception as e:
         if "Too Many Requests" in str(e):
     formatted_prompt = format_prompt(prompt, "openai")
     try:
+        # response = OAI_CLIENT.chat.completions.create(
+        #     model=os.getenv("OPENAI_MODEL"),
+        #     messages=formatted_prompt,
+        #     **OAI_GENERATE_KWARGS
+        # )
+        # logging.info("SIMPLE OUTPUT")
+        # logging.info(response.choices[0].message.content)
         stream = OAI_CLIENT.chat.completions.create(
             model=os.getenv("OPENAI_MODEL"),
             messages=formatted_prompt,
                 yield output
     except Exception as e:
+        logging.error(f"Exception during OpenAI generation: {str(e)}")
         if "Too Many Requests" in str(e):
             raise gr.Error("ERROR: Too many requests on OpenAI client")
         elif "You didn't provide an API key" in str(e):
             raise gr.Error("Authentication error: OpenAI key was either not provided or incorrect")
         else:
+            raise gr.Error(f"Unhandled Exception: {str(e)}")

backend/semantic_search.py CHANGED Viewed

@@ -50,7 +50,6 @@ retriever = SentenceTransformer(os.getenv("EMB_MODEL"))
 def get_table_name():
     emb_model = os.getenv("EMB_MODEL")
-    print(emb_model)
     if emb_model == "sentence-transformers/all-MiniLM-L6-v2":
         return MODEL1_STRATEGY1
     elif emb_model == "BAAI/bge-large-en-v1.5":

 def get_table_name():
     emb_model = os.getenv("EMB_MODEL")
     if emb_model == "sentence-transformers/all-MiniLM-L6-v2":
         return MODEL1_STRATEGY1
     elif emb_model == "BAAI/bge-large-en-v1.5":

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 lancedb==0.8.2
-openai==1.31.1
 langchain==0.2.5
 tiktoken
 sentence-transformers==3.0.0

 lancedb==0.8.2
+openai==1.35.3
 langchain==0.2.5
 tiktoken
 sentence-transformers==3.0.0