Spaces:

Ritesh-hf
/

web-agent

Running

App Files Files Community

Ritesh-hf commited on Nov 20

Commit

1c806b3

•

1 Parent(s): 38f2a07

update index

Browse files

Files changed (7) hide show

.gitignore +3 -0
Dockerfile +1 -1
UAE-NLA.json +0 -0
app.py +107 -93
requirements.txt +6 -7
templates/chat.html +2 -16
updated-traveler.json +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+*.ipynb
+__pycache__/*

Dockerfile CHANGED Viewed

@@ -13,4 +13,4 @@ COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
-CMD ["gunicorn", "-b", "0.0.0.0:7860", "-k", "geventwebsocket.gunicorn.workers.GeventWebSocketWorker", "-w", "1", "app:app"]

 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

UAE-NLA.json DELETED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,15 +1,13 @@
-from gevent import monkey
-monkey.patch_all()
 import nltk
 nltk.download('punkt_tab')
 import os
 from dotenv import load_dotenv
 import asyncio
-from flask import Flask, request, render_template
-from flask_cors import CORS
-from flask_socketio import SocketIO, emit, join_room, leave_room
 from langchain.chains import create_history_aware_retriever, create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_community.chat_message_histories import ChatMessageHistory
@@ -20,10 +18,12 @@ from pinecone import Pinecone
 from pinecone_text.sparse import BM25Encoder
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.retrievers import PineconeHybridSearchRetriever
-from langchain_groq import ChatGroq
 from langchain.retrievers import ContextualCompressionRetriever
-from langchain.retrievers.document_compressors import FlashrankRerank
 from langchain_community.chat_models import ChatPerplexity
 # Load environment variables
 load_dotenv(".env")
@@ -38,14 +38,19 @@ os.environ['USER_AGENT'] = USER_AGENT
 os.environ["GROQ_API_KEY"] = GROQ_API_KEY
 os.environ["TOKENIZERS_PARALLELISM"] = 'true'
-# Initialize Flask app and SocketIO with CORS
-app = Flask(__name__)
-CORS(app)
-socketio = SocketIO(app, async_mode='gevent', cors_allowed_origins="*")
-app.config['SESSION_COOKIE_SECURE'] = True  # Use HTTPS
-app.config['SESSION_COOKIE_HTTPONLY'] = True
-app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
-app.config['SECRET_KEY'] = SECRET_KEY
 # Function to initialize Pinecone connection
 def initialize_pinecone(index_name: str):
@@ -56,15 +61,13 @@ def initialize_pinecone(index_name: str):
         print(f"Error initializing Pinecone: {e}")
         raise
 ##################################################
 ##          Change down here
 ##################################################
 # Initialize Pinecone index and BM25 encoder
-pinecone_index = initialize_pinecone("traveler-demo-website-vectorstore")
-bm25 = BM25Encoder().load("./bm25_traveler_website.json")
 ##################################################
 ##################################################
@@ -80,11 +83,12 @@ retriever = PineconeHybridSearchRetriever(
 )
 # Initialize LLM
-# llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, max_tokens=1024, max_retries=2)
-llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-70b-instruct", max_tokens=1024, max_retries=2)
 # Initialize Reranker
-compressor = FlashrankRerank()
 compression_retriever = ContextualCompressionRetriever(
     base_compressor=compressor, base_retriever=retriever
 )
@@ -105,33 +109,31 @@ contextualize_q_prompt = ChatPromptTemplate.from_messages(
 history_aware_retriever = create_history_aware_retriever(llm, compression_retriever, contextualize_q_prompt)
 # QA system prompt and chain
-qa_system_prompt = """ You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively. \
-If you don't know the answer, simply state that you don't know. \
-Your answer should be in {language} language. \
-Provide answers in proper HTML format and keep them concise. \
-When responding to queries, follow these guidelines: \
-    1. Provide Clear Answers: \
-        - Ensure the response directly addresses the query with accurate and relevant information.\
-        - Only use information from the provided context. Using outside information may result in the termination of the chat.
-    2. Include Detailed References: \
-        - Links to Sources: Include URLs to credible sources where users can verify information or explore further. \
-        - Reference Sites: Mention specific websites or platforms that offer additional information. \
-        - Downloadable Materials: Provide links to any relevant downloadable resources if applicable. \
-    3. Formatting for Readability: \
-        - The answer should be in a proper HTML format with appropriate tags. \
-        - For Arabic language response align the text to the right and convert numbers.
-        - Double-check if the language of the answer is correct or not.
-        - Use bullet points or numbered lists where applicable to present information. \
-        - Highlight key details using bold or italics. \
-        - Provide proper and meaningful abbreviations for URLs. Do not include naked URLs. \
-    4. Organize Content Logically: \
-        - Structure the content logically, ensuring easy navigation and understanding for the user. \
 {context}
 """
 qa_prompt = ChatPromptTemplate.from_messages(
@@ -141,7 +143,9 @@ qa_prompt = ChatPromptTemplate.from_messages(
         ("human", "{input}")
     ]
 )
-question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
 # Retrieval and Generative (RAG) Chain
 rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
@@ -149,9 +153,6 @@ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chai
 # Chat message history storage
 store = {}
-def clean_temporary_data():
-    store.clear()
 def get_session_history(session_id: str) -> BaseChatMessageHistory:
     if session_id not in store:
         store[session_id] = ChatMessageHistory()
@@ -167,46 +168,59 @@ conversational_rag_chain = RunnableWithMessageHistory(
     output_messages_key="answer",
 )
-# Function to handle WebSocket connection
-@socketio.on('connect')
-def handle_connect():
-    print(f"Client connected: {request.sid}")
-    emit('connection_response', {'message': 'Connected successfully.'})
-# Function to handle WebSocket disconnection
-@socketio.on('disconnect')
-def handle_disconnect():
-    print(f"Client disconnected: {request.sid}")
-    clean_temporary_data()
-# Function to handle WebSocket messages
-@socketio.on('message')
-def handle_message(data):
-    question = data.get('question')
-    language = data.get('language')
-    if "en" in language:
-        language = "English"
-    else:
-        language = "Arabic"
-    session_id = data.get('session_id', SESSION_ID_DEFAULT)
-    chain = conversational_rag_chain.pick("answer")
     try:
-        for chunk in chain.stream(
-                {"input": question, 'language': language},
-                config={"configurable": {"session_id": session_id}},
-            ):
-            emit('response', chunk, room=request.sid)
-    except Exception as e:
-        print(f"Error during message handling: {e}")
-        emit('response', {"error": "An error occurred while processing your request."}, room=request.sid)
 # Home route
-@app.route("/")
-def index_view():
-    return render_template('chat.html')
-# Main function to run the app
-if __name__ == '__main__':
-    socketio.run(app, debug=True)

 import nltk
 nltk.download('punkt_tab')
 import os
 from dotenv import load_dotenv
 import asyncio
+from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from fastapi.middleware.cors import CORSMiddleware
 from langchain.chains import create_history_aware_retriever, create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_community.chat_message_histories import ChatMessageHistory
 from pinecone_text.sparse import BM25Encoder
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.retrievers import PineconeHybridSearchRetriever
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain_community.chat_models import ChatPerplexity
+from langchain.retrievers.document_compressors import CrossEncoderReranker
+from langchain_community.cross_encoders import HuggingFaceCrossEncoder
+from langchain_core.prompts import PromptTemplate
+import re
 # Load environment variables
 load_dotenv(".env")
 os.environ["GROQ_API_KEY"] = GROQ_API_KEY
 os.environ["TOKENIZERS_PARALLELISM"] = 'true'
+# Initialize FastAPI app and CORS
+app = FastAPI()
+origins = ["*"]  # Adjust as needed
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+templates = Jinja2Templates(directory="templates")
 # Function to initialize Pinecone connection
 def initialize_pinecone(index_name: str):
         print(f"Error initializing Pinecone: {e}")
         raise
 ##################################################
 ##          Change down here
 ##################################################
 # Initialize Pinecone index and BM25 encoder
+pinecone_index = initialize_pinecone("updated-traveler")
+bm25 = BM25Encoder().load("./updated-traveler.json")
 ##################################################
 ##################################################
 )
 # Initialize LLM
+llm = ChatPerplexity(temperature=0, pplx_api_key=GROQ_API_KEY, model="llama-3.1-sonar-large-128k-chat", max_tokens=512, max_retries=2)
 # Initialize Reranker
+model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
+compressor = CrossEncoderReranker(model=model, top_n=20)
 compression_retriever = ContextualCompressionRetriever(
     base_compressor=compressor, base_retriever=retriever
 )
 history_aware_retriever = create_history_aware_retriever(llm, compression_retriever, contextualize_q_prompt)
 # QA system prompt and chain
+qa_system_prompt = """ You are a highly skilled information retrieval assistant. Use the following context to answer questions effectively.
+If you don't know the answer, simply state that you don't know.
+Your answer should be in {language} language.
+When responding to queries, follow these guidelines:
+1. Provide Clear Answers:
+   - Based on the language of the question, you have to answer in that language. E.g., if the question is in English, then answer in English; if the question is in Arabic, you should answer in Arabic.
+   - Ensure the response directly addresses the query with accurate and relevant information.
+   - Do not give long answers. Provide detailed but concise responses.
+2. Formatting for Readability:
+   - Provide the entire response in proper markdown format.
+   - Use structured Maekdown elements such as headings, subheading, lists, tables, and links.
+   - Use emaphsis on headings, important texts and phrases.
+3. Proper Citations:
+   - ALWAYS USE INLINE CITATIONS with embed source URLs where users can verify information or explore further.
+   - The inline citations should be in the format [Source 1], [Source 2], etc. where on clicking on this the user should be redirected to the source url in a new tab.
+   - Do not inlcude references at the end of response.
+FOLLOW ALL THE GIVEN INSTRUCTIONS, FAILURE TO DO SO WILL RESULT IN TERMINATION OF THE CHAT.
+== CONTEXT ==
 {context}
 """
 qa_prompt = ChatPromptTemplate.from_messages(
         ("human", "{input}")
     ]
 )
+document_prompt = PromptTemplate(input_variables=["page_content", "source"], template="{page_content} \n\n Source: {source}")
+question_answer_chain = create_stuff_documents_chain(llm, qa_prompt, document_prompt=document_prompt)
 # Retrieval and Generative (RAG) Chain
 rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
 # Chat message history storage
 store = {}
 def get_session_history(session_id: str) -> BaseChatMessageHistory:
     if session_id not in store:
         store[session_id] = ChatMessageHistory()
     output_messages_key="answer",
 )
+# WebSocket endpoint with streaming
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    print(f"Client connected: {websocket.client}")
+    session_id = None
     try:
+        while True:
+            data = await websocket.receive_json()
+            question = data.get('question')
+            language = data.get('language')
+            if "en" in language:
+                language = "English"
+            else:
+                language = "Arabic"
+            session_id = data.get('session_id', SESSION_ID_DEFAULT)
+            # Process the question
+            try:
+                # Define an async generator for streaming
+                async def stream_response():
+                    complete_response = ""
+                    context = {}
+                    async for chunk in conversational_rag_chain.astream(
+                        {"input": question, 'language': language},
+                        config={"configurable": {"session_id": session_id}}
+                    ):
+                        if "context" in chunk:
+                            context = chunk['context']
+                        # Send each chunk to the client
+                        if "answer" in chunk:
+                            complete_response += chunk['answer']
+                            await websocket.send_json({'response': chunk['answer']})
+                    if context:
+                        citations = re.findall(r'\[(\d+)\]', complete_response)
+                        citation_numbers = list(map(int, citations))
+                        sources = dict()
+                        for index, doc in enumerate(context):
+                            if (index+1) in citation_numbers:
+                                sources[f"[{index+1}]"] = doc.metadata["source"]
+                        await websocket.send_json({'sources': sources})
+                await stream_response()
+            except Exception as e:
+                print(f"Error during message handling: {e}")
+                await websocket.send_json({'response': "Something went wrong, Please try again.."})
+    except WebSocketDisconnect:
+        print(f"Client disconnected: {websocket.client}")
+        if session_id:
+            store.pop(session_id, None)
 # Home route
+@app.get("/", response_class=HTMLResponse)
+async def read_index(request: Request):
+    return templates.TemplateResponse("chat.html", {"request": request})

requirements.txt CHANGED Viewed

@@ -5,10 +5,9 @@ langchain-huggingface
 pinecone
 pinecone-text
 flashrank
-flask
-flask-cors
-flask-socketio
-gunicorn
-gevent
-gevent-websocket
-openai

 pinecone
 pinecone-text
 flashrank
+fastapi>=0.68.0
+uvicorn[standard]>=0.15.0
+websockets>=10.0
+python-multipart>=0.0.5
+openai
+einops

templates/chat.html CHANGED Viewed

@@ -4,24 +4,10 @@
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Chat with LLM</title>
-    <link rel="stylesheet" href="../static/styles.css">
 </head>
 <body>
-    <h3>This is a RAG application over the website <a href="https://omkar0896.pythonanywhere.com/">https://omkar0896.pythonanywhere.com/</a></h3>
-    <div class="chat-container">
-        <div class="chat-box" id="chat-box">
-        </div>
-        <div class="input-container">
-            <input type="text" id="chat-input" placeholder="Type your message here...">
-            <button id="send-button">Send</button>
-        </div>
     </div>
-    <script src="https://cdn.socket.io/4.5.0/socket.io.min.js"></script>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.1.0/showdown.min.js" integrity="sha512-LhccdVNGe2QMEfI3x4DVV3ckMRe36TfydKss6mJpdHjNFiV07dFpS2xzeZedptKZrwxfICJpez09iNioiSZ3hA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
-    <script src="../static/script.js"></script>
 </body>
 </html>

     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Chat with LLM</title>
 </head>
 <body>
+    <div style="width: 100%; height: 100vh; display: flex; align-items: center; justify-content: center;">
+        <h3>This is a demo</h3>
     </div>
 </body>
 </html>

updated-traveler.json ADDED Viewed

The diff for this file is too large to render. See raw diff