Sentinel-AI-Web-Search-Test

Sleeping

App Files Files Community

Shreyas094 commited on Sep 9, 2024

Commit

8b5e7fa

verified ·

1 Parent(s): f3cc462

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -107

app.py CHANGED Viewed

@@ -1,27 +1,18 @@
 import os
 import logging
-import json
-import time
 import gradio as gr
 from huggingface_hub import InferenceClient
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.schema import Document
 from duckduckgo_search import DDGS
-from dotenv import load_dotenv
-from functools import lru_cache
-from tenacity import retry, stop_after_attempt, wait_fixed
-# Load environment variables
-load_dotenv()
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
 # Environment variables and configurations
-HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
-logger.info(f"Using Hugging Face token: {HUGGINGFACE_TOKEN[:4]}...{HUGGINGFACE_TOKEN[-4:] if HUGGINGFACE_TOKEN else 'Not Set'}")
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
@@ -33,8 +24,7 @@ MODELS = [
     "google/gemma-2-27b-it"
 ]
-FALLBACK_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
 DEFAULT_SYSTEM_PROMPT = """You are a world-class financial AI assistant, capable of complex reasoning and reflection.
 Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags.
 Providing comprehensive and accurate information based on web search results is essential.
@@ -42,47 +32,36 @@ Your goal is to synthesize the given context into a coherent and detailed respon
 Please ensure that your response is well-structured and factual.
 If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
-class WebSearcher:
-    def __init__(self):
-        self.ddgs = DDGS()
-    @lru_cache(maxsize=100)
-    def search(self, query, max_results=5):
-        try:
-            results = list(self.ddgs.text(query, max_results=max_results))
-            logger.info(f"Search completed for query: {query}")
-            return results
-        except Exception as e:
-            logger.error(f"Error during DuckDuckGo search: {str(e)}")
-            return []
-@lru_cache(maxsize=1)
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
 def create_web_search_vectors(search_results):
     embed = get_embeddings()
-    documents = [
-        Document(
-            page_content=f"{result['title']}\n{result['body']}\nSource: {result['href']}",
-            metadata={"source": result['href']}
-        )
-        for result in search_results if 'body' in result
-    ]
-    logger.info(f"Created vectors for {len(documents)} search results.")
     return FAISS.from_documents(documents, embed)
-@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
-def make_api_call(client, api_params):
-    return client.chat_completion(**api_params)
-def get_response_with_search(query, system_prompt, model, use_embeddings, history, num_calls=3, temperature=0.2):
-    searcher = WebSearcher()
-    search_results = searcher.search(query)
     if not search_results:
-        logger.warning(f"No web search results found for query: {query}")
-        return "No web search results available. Please try again.", ""
     sources = [result['href'] for result in search_results if 'href' in result]
     source_list_str = "\n".join(sources)
@@ -95,80 +74,105 @@ def get_response_with_search(query, system_prompt, model, use_embeddings, histor
     else:
         context = "\n".join([f"{result['title']}\n{result['body']}" for result in search_results])
-    logger.info(f"Context created for query: {query}")
-    chat_history = "\n".join([f"Human: {h[0]}\nAI: {h[1]}" for h in history])
-    user_message = f"""Chat history:
-{chat_history}
-Using the following context from web search results:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'."""
-    client = InferenceClient(model, token=HUGGINGFACE_TOKEN)
     full_response = ""
     try:
-        for _ in range(num_calls):
-            api_params = {
-                "messages": [
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_message}
-                ],
-                "max_tokens": 3000,
-                "temperature": temperature,
-                "top_p": 0.8,
-            }
-            logger.info(f"Sending request to API with params: {json.dumps(api_params, indent=2, default=str)}")
-            response = make_api_call(client, api_params)
-            logger.info(f"Raw response from model: {response}")
-            if isinstance(response, dict):
-                if 'generated_text' in response:
-                    full_response += response['generated_text']
-                elif 'choices' in response and len(response['choices']) > 0:
-                    if isinstance(response['choices'][0], dict) and 'message' in response['choices'][0]:
-                        full_response += response['choices'][0]['message'].get('content', '')
-                    elif isinstance(response['choices'][0], str):
-                        full_response += response['choices'][0]
-            elif hasattr(response, 'generated_text'):
-                full_response += response.generated_text
-            elif hasattr(response, 'content'):
-                full_response += response.content
-            else:
-                logger.error(f"Unexpected response format from the model: {type(response)}")
-                return "Unexpected response format from the model. Please try again.", ""
-            time.sleep(1)  # Add a 1-second delay between API calls
-    except Exception as e:
-        logger.error(f"Error in get_response_with_search: {str(e)}")
-        logger.info(f"Attempting fallback to {FALLBACK_MODEL}")
-        client = InferenceClient(FALLBACK_MODEL, token=HUGGINGFACE_TOKEN)
-        # Retry with fallback model (you can implement retry logic here)
-        return f"An error occurred while processing your request: {str(e)}", ""
     if not full_response:
-        logger.warning("No response generated from the model")
-        return "No response generated from the model.", ""
-    else:
-        return f"{full_response}\n\nSources:\n{source_list_str}", ""
-def respond(message, system_prompt, history, model, temperature, num_calls, use_embeddings):
-    logger.info(f"Respond function called with message: {message}")
-    logger.info(f"User Query: {message}")
-    logger.info(f"Model Used: {model}")
-    logger.info(f"Temperature: {temperature}")
-    logger.info(f"Number of API Calls: {num_calls}")
-    logger.info(f"Use Embeddings: {use_embeddings}")
-    logger.info(f"System Prompt: {system_prompt}")
-    logger.info(f"History: {history}")
     try:
-        main_content, sources = get_response_with_search(message, system_prompt, model, use_embeddings, history, num_calls=num_calls, temperature=temperature)
-        return main_content
     except Exception as e:
-        logger.error(f"Error in respond function: {str(e)}")
-        return f"An error occurred: {str(e)}"
 css = """
 /* Fine-tune chatbox size */
@@ -182,6 +186,7 @@ css = """
 }
 """
 def create_gradio_interface():
     custom_placeholder = "Enter your question here for web search."
@@ -232,4 +237,4 @@ def create_gradio_interface():
 if __name__ == "__main__":
     demo = create_gradio_interface()
-    demo.launch(share=True)

 import os
 import logging
+import asyncio
 import gradio as gr
 from huggingface_hub import InferenceClient
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.schema import Document
 from duckduckgo_search import DDGS
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Environment variables and configurations
+huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "google/gemma-2-27b-it"
 ]
+# Default system message template
 DEFAULT_SYSTEM_PROMPT = """You are a world-class financial AI assistant, capable of complex reasoning and reflection.
 Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags.
 Providing comprehensive and accurate information based on web search results is essential.
 Please ensure that your response is well-structured and factual.
 If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
+def duckduckgo_search(query):
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=5)
+        logging.info(f"Search completed for query: {query}")
+        return results
+    except Exception as e:
+        logging.error(f"Error during DuckDuckGo search: {str(e)}")
+        return []
 def create_web_search_vectors(search_results):
     embed = get_embeddings()
+    documents = []
+    for result in search_results:
+        if 'body' in result:
+            content = f"{result['title']}\n{result['body']}\nSource: {result['href']}"
+            documents.append(Document(page_content=content, metadata={"source": result['href']}))
+    logging.info(f"Created vectors for {len(documents)} search results.")
     return FAISS.from_documents(documents, embed)
+async def get_response_with_search(query, system_prompt, model, use_embeddings, history=None, num_calls=3, temperature=0.2):
+    search_results = duckduckgo_search(query)
     if not search_results:
+        logging.warning(f"No web search results found for query: {query}")
+        yield "No web search results available. Please try again.", ""
+        return
     sources = [result['href'] for result in search_results if 'href' in result]
     source_list_str = "\n".join(sources)
     else:
         context = "\n".join([f"{result['title']}\n{result['body']}" for result in search_results])
+    logging.info(f"Context created for query: {query}")
+    user_message = f"""Using the following context from web search results:
 {context}
 Write a detailed and complete research document that fulfills the following user request: '{query}'."""
+    client = InferenceClient(model, token=huggingface_token)
     full_response = ""
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_message}
+    ]
+    # Include chat history if provided
+    if history:
+        messages = history + messages
     try:
+        for call in range(num_calls):
+            try:
+                for response in client.chat_completion(
+                    messages=messages,
+                    max_tokens=6000,
+                    temperature=temperature,
+                    stream=True,
+                    top_p=0.8,
+                ):
+                    if isinstance(response, dict) and "choices" in response:
+                        for choice in response["choices"]:
+                            if "delta" in choice and "content" in choice["delta"]:
+                                chunk = choice["delta"]["content"]
+                                full_response += chunk
+                                yield full_response, ""
+                    else:
+                        logging.error("Unexpected response format or missing attributes in the response object.")
+                        break
+            except Exception as e:
+                logging.error(f"Error in API call {call + 1}: {str(e)}")
+                if "422 Client Error" in str(e):
+                    logging.warning("Received 422 Client Error. Adjusting request parameters.")
+                    # You might want to adjust parameters here, e.g., reduce max_tokens
+                yield f"An error occurred during API call {call + 1}. Retrying...", ""
+            # Add a small delay between API calls
+            await asyncio.sleep(1)  # 1 second delay
+    except asyncio.CancelledError:
+        logging.warning("The operation was cancelled.")
+        yield "The operation was cancelled. Please try again.", ""
     if not full_response:
+        logging.warning("No response generated from the model")
+        yield "No response generated from the model.", ""
+    yield f"{full_response}\n\nSources:\n{source_list_str}", ""
+async def respond(message, system_prompt, history, model, temperature, num_calls, use_embeddings):
+    logging.info(f"User Query: {message}")
+    logging.info(f"Model Used: {model}")
+    logging.info(f"Temperature: {temperature}")
+    logging.info(f"Number of API Calls: {num_calls}")
+    logging.info(f"Use Embeddings: {use_embeddings}")
+    logging.info(f"System Prompt: {system_prompt}")
+    # Convert gradio history to the format expected by get_response_with_search
+    chat_history = []
+    for human, assistant in history:
+        chat_history.append({"role": "user", "content": human})
+        if assistant:
+            chat_history.append({"role": "assistant", "content": assistant})
     try:
+        full_response = ""
+        async for main_content, sources in get_response_with_search(
+            message,
+            system_prompt,
+            model,
+            use_embeddings,
+            history=chat_history,
+            num_calls=num_calls,
+            temperature=temperature
+        ):
+            # Yield only the new content
+            new_content = main_content[len(full_response):]
+            full_response = main_content
+            yield new_content
+        # Yield the sources as a separate message
+        if sources:
+            yield f"\n\nSources:\n{sources}"
+    except asyncio.CancelledError:
+        logging.warning("The operation was cancelled.")
+        yield "The operation was cancelled. Please try again."
     except Exception as e:
+        logging.error(f"Error in respond function: {str(e)}")
+        yield f"An error occurred: {str(e)}"
 css = """
 /* Fine-tune chatbox size */
 }
 """
+# Gradio interface setup
 def create_gradio_interface():
     custom_placeholder = "Enter your question here for web search."
 if __name__ == "__main__":
     demo = create_gradio_interface()
+    demo.launch(share=True)