Sentinel-AI-Web-Search-Test

Sleeping

App Files Files Community

Shreyas094 commited on Sep 6, 2024

Commit

bd71ef9

verified ·

1 Parent(s): 5857237

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -12

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ from duckduckgo_search import DDGS
 from typing import List, Dict
 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
 # Set up basic configuration for logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -36,23 +40,70 @@ class ConversationManager:
 conversation_manager = ConversationManager()
-def duckduckgo_search(query):
     with DDGS() as ddgs:
-        results = ddgs.text(query, max_results=5)
-        return list(results)
-def get_web_search_results(query: str, max_results: int = 10, use_ddgs_text: bool = False) -> List[Dict[str, str]]:
     try:
-        if use_ddgs_text:
-            results = duckduckgo_search(query)
         else:
-            results = list(DDGS().text(query, max_results=max_results))
-        if not results:
-            print(f"No results found for query: {query}")
-        return results
     except Exception as e:
-        print(f"An error occurred during web search: {str(e)}")
-        return [{"error": f"An error occurred during web search: {str(e)}"}]
 def rephrase_query(original_query: str, conversation_manager: ConversationManager) -> str:
     context = conversation_manager.get_context()

 from typing import List, Dict
 from pydantic import BaseModel
 from huggingface_hub import InferenceClient
+from typing import List, Dict
+from sentence_transformers import SentenceTransformer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
 # Set up basic configuration for logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 conversation_manager = ConversationManager()
+huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
+def duckduckgo_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:
     with DDGS() as ddgs:
+        results = list(ddgs.text(query, max_results=max_results))
+    return results
+def get_web_search_results(query: str, model: str, num_calls: int = 3, temperature: float = 0.2, max_results: int = 10) -> Dict[str, any]:
     try:
+        # Perform web search
+        search_results = duckduckgo_search(query, max_results)
+        if not search_results:
+            return {"error": f"No results found for query: {query}"}
+        # Create embeddings for search results
+        embedder = SentenceTransformer('all-MiniLM-L6-v2')
+        web_search_vectors = embedder.encode([result['body'] for result in search_results])
+        # Retrieve relevant documents
+        query_vector = embedder.encode([query])
+        similarities = cosine_similarity(query_vector, web_search_vectors)[0]
+        top_indices = np.argsort(similarities)[-5:][::-1]
+        relevant_docs = [search_results[i] for i in top_indices]
+        # Prepare context
+        context = "\n".join([f"Title: {doc['title']}\nContent: {doc['body']}" for doc in relevant_docs])
+        # Prepare prompt
+        prompt = f"""Using the following context from web search results:
+{context}
+Write a detailed and complete research document that fulfills the following user request: '{query}'
+After writing the document, please provide a list of sources used in your response."""
+        # Generate response based on the selected model
+        if model == "@cf/meta/llama-3.1-8b-instruct":
+            # Use Cloudflare API (placeholder, as the actual implementation is not provided)
+            response = get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web")
         else:
+            # Use Hugging Face API
+            client = InferenceClient(model, token=huggingface_token)
+            response = ""
+            for _ in range(num_calls):
+                for message in client.chat_completion(
+                    messages=[{"role": "user", "content": prompt}],
+                    max_tokens=10000,
+                    temperature=temperature,
+                    stream=True,
+                ):
+                    if message.choices and message.choices[0].delta and message.choices[0].delta.content:
+                        response += message.choices[0].delta.content
+        return {
+            "query": query,
+            "search_results": search_results,
+            "relevant_docs": relevant_docs,
+            "response": response
+        }
     except Exception as e:
+        return {"error": f"An error occurred during web search and processing: {str(e)}"}
 def rephrase_query(original_query: str, conversation_manager: ConversationManager) -> str:
     context = conversation_manager.get_context()