Arnavkumar01 commited on
Commit
d08081f
·
1 Parent(s): 0efb7cc

inital commit

Browse files
Files changed (4) hide show
  1. Dockerfile +33 -0
  2. docker-compose.yml +26 -0
  3. main.py +310 -0
  4. requirements.txt +16 -0
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Start with a lean and official Python base image
2
+ FROM python:3.10-slim
3
+
4
+ # Install dependencies for psycopg2 (libpq-dev is still needed)
5
+ # ffmpeg is NO LONGER needed for this version
6
+ RUN apt-get update && apt-get install -y libpq-dev && rm -rf /var/lib/apt/lists/*
7
+
8
+ # 2. Set the working directory inside the container
9
+ WORKDIR /app
10
+
11
+ # 3. Create a non-root user and set up cache
12
+ RUN useradd -m -u 1000 user
13
+ RUN mkdir -p /app/.cache && chown -R user:user /app/.cache
14
+ ENV HF_HOME="/app/.cache"
15
+ USER user
16
+
17
+ # Add local bin directory to PATH
18
+ ENV PATH="/home/user/.local/bin:${PATH}"
19
+
20
+ # 4. Copy and install dependencies
21
+ COPY --chown=user:user requirements.txt .
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
+
24
+ # 5. Copy the app source code
25
+ COPY --chown=user:user . .
26
+
27
+ # 6. Expose the port used by Hugging Face Spaces
28
+ EXPOSE 7860
29
+
30
+ # 7. Run the FastAPI app using Uvicorn
31
+ # This assumes your file is named "main.py". If you named it "browser_main.py",
32
+ # change "main:app" to "browser_main:app"
33
+ CMD ["uvicorn", "main:app", "--host", "0.Example Network Diagram.0.0.0", "--port", "7860"]
docker-compose.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docker-compose.yml
2
+ services:
3
+ # This is the name of your service, you can call it anything
4
+ rag-api:
5
+ # Tells Docker Compose to build the image from the Dockerfile in the current directory (.)
6
+ build: .
7
+
8
+ # This is the magic part! It tells the service to load environment variables
9
+ # from the .env file in the same directory.
10
+ env_file:
11
+ - .env
12
+
13
+ # This maps port 8000 on your local machine to port 8000 inside the container.
14
+ # Your FastAPI app will be accessible at http://localhost:8000
15
+ ports:
16
+ - "8000:8000"
17
+
18
+ # This sets up a "volume" for live code reloading. Any changes you make in your
19
+ # local './app' folder will be instantly reflected inside the container's '/app/app'
20
+ # folder, so you don't have to rebuild the image for every code change.
21
+ volumes:
22
+ - ./app:/app/app
23
+
24
+ # Overrides the default command from the Dockerfile to enable --reload for development
25
+ # This makes Gunicorn restart automatically when you save a file.
26
+ command: gunicorn --bind 0.0.0.0:8000 --workers 1 --worker-class uvicorn.workers.UvicornWorker --timeout 0 --reload "main:app"
main.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import logging
4
+ import json
5
+ import re
6
+ from contextlib import asynccontextmanager
7
+ from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request, status, Depends, Header, HTTPException
8
+ from fastapi.concurrency import run_in_threadpool # This line is corrected (no syntax error)
9
+ from pydantic import BaseModel
10
+ from dotenv import load_dotenv
11
+ from openai import OpenAI
12
+ from elevenlabs.client import ElevenLabs
13
+ from langchain_huggingface import HuggingFaceEmbeddings
14
+ from langchain_postgres.vectorstores import PGVector
15
+ from sqlalchemy import create_engine
16
+ import asyncio
17
+ import io
18
+
19
+ # --- SETUP ---
20
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
21
+ logging.getLogger('tensorflow').setLevel(logging.ERROR)
22
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
23
+
24
+ # Load environment variables
25
+ load_dotenv()
26
+ NEON_DATABASE_URL = os.getenv("NEON_DATABASE_URL")
27
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
28
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
29
+ SHARED_SECRET = os.getenv("SHARED_SECRET")
30
+
31
+ # --- CONFIGURATION ---
32
+ COLLECTION_NAME = "real_estate_embeddings"
33
+ EMBEDDING_MODEL = "hkunlp/instructor-large"
34
+ ELEVENLABS_VOICE_NAME = "Leo"
35
+ PLANNER_MODEL = "gpt-4o-mini"
36
+ ANSWERER_MODEL = "gpt-4o"
37
+ TABLE_DESCRIPTIONS = """
38
+ - "ongoing_projects_source": Details about projects currently under construction.
39
+ - "upcoming_projects_source": Information on future planned projects.
40
+ - "completed_projects_source": Facts about projects that are already finished.
41
+ - "historical_sales_source": Specific sales records, including price, date, and property ID.
42
+ - "past_customers_source": Information about previous customers.
43
+ - "feedback_source": Customer feedback and ratings for projects.
44
+ """
45
+
46
+ # --- GLOBAL VARIABLES & CLIENTS ---
47
+ embeddings = None
48
+ vector_store = None
49
+
50
+ client_openai = OpenAI(api_key=OPENAI_API_KEY)
51
+ client_elevenlabs = ElevenLabs(api_key=ELEVENLABS_API_KEY)
52
+
53
+
54
+ # --- FASTAPI LIFESPAN MANAGEMENT ---
55
+ @asynccontextmanager
56
+ async def lifespan(app: FastAPI):
57
+ """Manages application startup and shutdown logic."""
58
+ global embeddings, vector_store
59
+ logging.info(f"Initializing embedding model: '{EMBEDDING_MODEL}'...")
60
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
61
+ logging.info("Embedding model loaded successfully.")
62
+
63
+ logging.info(f"Connecting to vector store '{COLLECTION_NAME}'...")
64
+ engine = create_engine(NEON_DATABASE_URL, pool_pre_ping=True)
65
+ vector_store = PGVector(
66
+ connection=engine,
67
+ collection_name=COLLECTION_NAME,
68
+ embeddings=embeddings,
69
+ )
70
+ logging.info("Successfully connected to the vector store.")
71
+ yield
72
+ logging.info("Application shutting down.")
73
+
74
+ # --- INITIALIZE FastAPI APP ---
75
+ app = FastAPI(lifespan=lifespan)
76
+
77
+ # --- PROMPTS ---
78
+ QUERY_FORMULATION_PROMPT = f"""
79
+ You are a query analysis agent. Your task is to transform a user's query into a precise search query for a vector database and determine the correct table to filter by.
80
+ **Available Tables:**
81
+ {TABLE_DESCRIPTIONS}
82
+ **User's Query:** "{{user_query}}"
83
+ **Your Task:**
84
+ 1. Rephrase the user's query into a clear, keyword-focused English question suitable for a database search.
85
+ 2. Analyze the user's query for keywords indicating project status (e.g., "ongoing", "under construction", "completed", "finished", "upcoming", "new launch").
86
+ 3. If such status keywords are present, identify the single most relevant table from the list above to filter by.
87
+ 4. If no specific status keywords are mentioned (e.g., the user asks generally about projects in a location), set the filter table to null.
88
+ 5. Respond ONLY with a JSON object containing "search_query" and "filter_table" (which should be the table name string or null).
89
+ """
90
+ ANSWER_SYSTEM_PROMPT = """
91
+ You are an expert AI assistant for a premier real estate developer.
92
+ ## YOUR PERSONA
93
+ - You are professional, helpful, and highly knowledgeable. Your tone should be polite and articulate.
94
+ ## CORE BUSINESS KNOWLEDGE
95
+ - **Operational Cities:** We are currently operational in Pune, Mumbai, Bengaluru, Delhi, Chennai, Hyderabad, Goa, Gurgaon, Kolkata.
96
+ - **Property Types:** We offer luxury apartments, villas, and commercial properties.
97
+ - **Budget Range:** Our residential properties typically range from 45 lakhs to 5 crores.
98
+ ## CORE RULES
99
+ 1. **Language Adaptation:** If the user's original query was in Hinglish, respond in Hinglish. If in English, respond in English.
100
+ 2. **Fact-Based Answers:** Use the provided CONTEXT to answer the user's question. If the context is empty, use your Core Business Knowledge.
101
+ 3. **Stay on Topic:** Only answer questions related to real estate.
102
+ """
103
+
104
+
105
+ # --- HELPER FUNCTIONS (to be run in threadpool) ---
106
+
107
+ def transcribe_audio(audio_bytes: bytes) -> str:
108
+ """
109
+ Transcribes any audio format (WAV, MP3, WebM, Opus) from raw bytes.
110
+ Whisper will auto-detect the format.
111
+ """
112
+ for attempt in range(3):
113
+ try:
114
+ audio_file = io.BytesIO(audio_bytes)
115
+ # Give it a "name" hint for the API, but format is auto-detected
116
+ audio_file.name = "input.audio"
117
+
118
+ transcript = client_openai.audio.transcriptions.create(
119
+ model="whisper-1",
120
+ file=audio_file
121
+ )
122
+ text = transcript.text
123
+
124
+ # Check for Hindi script and transliterate
125
+ if re.search(r'[\u0900-\u097F]', text):
126
+ translit_prompt = f"Transliterate this Hindi text to Roman script (Hinglish style): {text}"
127
+ response = client_openai.chat.completions.create(
128
+ model="gpt-4o-mini",
129
+ messages=[{"role": "user", "content": translit_prompt}],
130
+ temperature=0.0
131
+ )
132
+ text = response.choices[0].message.content
133
+
134
+ return text.strip()
135
+ except Exception as e:
136
+ logging.error(f"Error during transcription (attempt {attempt+1}): {e}", exc_info=True)
137
+ if attempt == 2:
138
+ return ""
139
+
140
+ def generate_elevenlabs_sync(text: str, voice: str) -> bytes:
141
+ """Synchronous ElevenLabs generation wrapper for run_in_threadpool."""
142
+ for attempt in range(3):
143
+ try:
144
+ return client_elevenlabs.generate(
145
+ text=text,
146
+ voice=voice,
147
+ model="eleven_multilingual_v2",
148
+ output_format="mp3_44100_128"
149
+ )
150
+ except Exception as e:
151
+ logging.error(f"Error in ElevenLabs generate (attempt {attempt+1}): {e}", exc_info=True)
152
+ if attempt == 2:
153
+ return b''
154
+
155
+
156
+ # --- RAG/LLM FUNCTIONS (async) ---
157
+
158
+ async def formulate_search_plan(user_query: str) -> dict:
159
+ logging.info("Formulating search plan with Planner LLM...")
160
+ for attempt in range(3):
161
+ try:
162
+ response = await run_in_threadpool(
163
+ client_openai.chat.completions.create,
164
+ model=PLANNER_MODEL,
165
+ messages=[{"role": "user", "content": QUERY_FORMULATION_PROMPT.format(user_query=user_query)}],
166
+ response_format={"type": "json_object"},
167
+ temperature=0.0
168
+ )
169
+ plan = json.loads(response.choices[0].message.content)
170
+ logging.info(f"Search plan received: {plan}")
171
+ return plan
172
+ except Exception as e:
173
+ logging.error(f"Error in Planner LLM call (attempt {attempt+1}): {e}", exc_info=True)
174
+ if attempt == 2:
175
+ return {"search_query": user_query, "filter_table": None}
176
+
177
+ async def get_agent_response(user_text: str) -> str:
178
+ """Runs RAG and generation logic for a given text query with retries."""
179
+ for attempt in range(3):
180
+ try:
181
+ search_plan = await formulate_search_plan(user_text)
182
+ search_query = search_plan.get("search_query", user_text)
183
+ filter_table = search_plan.get("filter_table")
184
+
185
+ search_filter = {"source_table": filter_table} if filter_table else {}
186
+ if search_filter:
187
+ logging.info(f"Applying initial filter: {search_filter}")
188
+
189
+ # Run blocking DB call in threadpool
190
+ retrieved_docs = await run_in_threadpool(
191
+ vector_store.similarity_search,
192
+ search_query, k=3, filter=search_filter
193
+ )
194
+
195
+ if not retrieved_docs:
196
+ logging.info("Initial search returned no results. Performing a broader fallback search.")
197
+ retrieved_docs = await run_in_threadpool(
198
+ vector_store.similarity_search,
199
+ search_query, k=3
200
+ )
201
+
202
+ context_text = "\n\n".join([doc.page_content for doc in retrieved_docs])
203
+ logging.info(f"Retrieved Context (preview): {context_text[:500]}...")
204
+
205
+ final_prompt_messages = [
206
+ {"role": "system", "content": ANSWER_SYSTEM_PROMPT},
207
+ {"role": "system", "content": f"Use the following CONTEXT to answer:\n{context_text}"},
208
+ {"role": "user", "content": f"My original question was: '{user_text}'"}
209
+ ]
210
+
211
+ # Run blocking OpenAI call in threadpool
212
+ final_response = await run_in_threadpool(
213
+ client_openai.chat.completions.create,
214
+ model=ANSWERER_MODEL,
215
+ messages=final_prompt_messages
216
+ )
217
+ # --- TYPO FIX WAS HERE ---
218
+ return final_response.choices[0].message.content
219
+ except Exception as e:
220
+ logging.error(f"Error in get_agent_response (attempt {attempt+1}): {e}", exc_info=True)
221
+ if attempt == 2:
222
+ return "Sorry, I couldn't generate a response. Please try again."
223
+
224
+ # --- AUTH / TEST ENDPOINT HELPERS ---
225
+
226
+ class TextQuery(BaseModel):
227
+ query: str
228
+
229
+ async def verify_token(x_auth_token: str = Header(...)):
230
+ """Dependency to verify the shared secret token."""
231
+ if not SHARED_SECRET or x_auth_token != SHARED_SECRET:
232
+ logging.warning("Authentication failed for /test-text-query.")
233
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing authentication token")
234
+ logging.info("Authentication successful for /test-text-query.")
235
+
236
+
237
+ # --- API ENDPOINTS ---
238
+
239
+ @app.post("/test-text-query", dependencies=[Depends(verify_token)])
240
+ async def test_text_query_endpoint(query: TextQuery):
241
+ """Endpoint for text-based testing via Swagger UI."""
242
+ logging.info(f"Received text query: {query.query}")
243
+ response_text = await get_agent_response(query.query)
244
+ logging.info(f"Generated text response: {response_text}")
245
+ return {"response": response_text}
246
+
247
+
248
+ @app.websocket("/browser-listen")
249
+ async def browser_websocket_endpoint(websocket: WebSocket):
250
+ """
251
+ Main WebSocket endpoint for browser-based audio.
252
+ Receives one audio blob, returns one audio blob.
253
+ """
254
+ await websocket.accept()
255
+ logging.info("Browser client connected.")
256
+
257
+ try:
258
+ while True:
259
+ # 1. Receive JSON message from browser
260
+ message = await websocket.receive_json()
261
+ audio_base64 = message.get("audio")
262
+
263
+ if not audio_base64:
264
+ continue
265
+
266
+ logging.info("Received audio blob from browser.")
267
+ audio_bytes = base64.b64decode(audio_base64)
268
+
269
+ # 2. Transcribe (Shared logic)
270
+ # This works for WAV, WebM, Opus, etc.
271
+ user_text = await run_in_threadpool(transcribe_audio, audio_bytes)
272
+ if not user_text:
273
+ logging.info("Transcription empty; skipping.")
274
+ continue
275
+ logging.info(f"User said: {user_text}")
276
+
277
+ # 3. Get AI response (Shared logic)
278
+ agent_response_text = await get_agent_response(user_text)
279
+ if not agent_response_text:
280
+ logging.warning("Agent generated empty response.")
281
+ continue
282
+ logging.info(f"AI Responded (preview): {agent_response_text[:100]}...")
283
+
284
+ # 4. Generate AI speech (Shared logic)
285
+ ai_audio_bytes = await run_in_threadpool(
286
+ generate_elevenlabs_sync,
287
+ agent_response_text,
288
+ ELEVENLABS_VOICE_NAME
289
+ )
290
+ if not ai_audio_bytes:
291
+ continue
292
+
293
+ # 5. Send audio and text back to browser
294
+ response_audio_base64 = base64.b64encode(ai_audio_bytes).decode('utf-8')
295
+
296
+ await websocket.send_json({
297
+ "text": agent_response_text,
298
+ "audio": response_audio_base64
299
+ })
300
+ logging.info("Sent AI audio response back to browser.")
301
+
302
+ except WebSocketDisconnect:
303
+ logging.info("Browser client disconnected.")
304
+ except Exception as e:
305
+ logging.error(f"An error occurred in browser websocket: {e}", exc_info=True)
306
+ finally:
307
+ try:
308
+ await websocket.close()
309
+ except Exception:
310
+ pass
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.13
2
+ uvicorn==0.34.3
3
+ websockets==15.0.1
4
+ openai==2.3.0
5
+ elevenlabs==2.17.0
6
+ gunicorn==23.0.0
7
+ psycopg2-binary==2.9.10
8
+ pandas==2.2.3
9
+ pydub==0.25.1
10
+ python-dotenv==1.1.0
11
+ sentence-transformers==5.1.1
12
+ langchain-huggingface==0.3.1
13
+ langchain-postgres==0.0.15
14
+ langchain-openai==0.3.35
15
+ langdetect==1.0.9
16
+ SQLAlchemy==2.0.40