Deepti-AI commited on
Commit
4187281
·
verified ·
1 Parent(s): 361a05c

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -0
  2. Database.xlsx +3 -0
  3. Dockerfile +38 -0
  4. main.py +581 -0
  5. static/robot.gif +3 -0
  6. templates/index.html +152 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Database.xlsx filter=lfs diff=lfs merge=lfs -text
37
+ static/robot.gif filter=lfs diff=lfs merge=lfs -text
Database.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1b2fa6e90d86a5446f7d192fe2b8060ce0dbcae7d4bfa11aa68b6e74774ba40
3
+ size 1364411
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # ---- Set environment variables (Updated here only for HF compatibility) ----
4
+ ENV PYTHONUNBUFFERED=1
5
+ ENV TRANSFORMERS_CACHE=/app/.cache/transformers
6
+ ENV HF_HOME=/app/.cache/huggingface
7
+
8
+ # ---- Optional: define keys via secrets, not hardcoded ----
9
+ # ENV DEEPGRAM_API_KEY=your_key
10
+ # ENV OPENAI_API_KEY=your_key
11
+
12
+ # ---- Set working directory ----
13
+ WORKDIR /app
14
+
15
+ # Add below WORKDIR /app
16
+ RUN mkdir -p /app/.cache/transformers /app/.cache/huggingface
17
+ RUN chmod -R 777 /app/.cache
18
+
19
+ # ---- Install system dependencies ----
20
+ RUN apt-get update && apt-get install -y \
21
+ ffmpeg \
22
+ build-essential \
23
+ libsndfile1 \
24
+ && rm -rf /var/lib/apt/lists/*
25
+
26
+
27
+ # ---- Copy project files ----
28
+ COPY . /app
29
+
30
+ # ---- Install Python dependencies ----
31
+ RUN pip install --upgrade pip
32
+ RUN pip install --no-cache-dir -r requirements.txt
33
+
34
+ # ---- Expose port ----
35
+ EXPOSE 7860
36
+
37
+ # ---- Run FastAPI app ----
38
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,581 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["TRANSFORMERS_CACHE"] = "/app/.cache/transformers"
3
+ os.environ["HF_HOME"] = "/app/.cache/huggingface"
4
+
5
+
6
+ from google.cloud import speech_v1p1beta1 as speech
7
+ from google.cloud import translate_v2 as translate
8
+ from pydub import AudioSegment
9
+ import wave
10
+ import uvicorn
11
+
12
+
13
+ from fastapi import FastAPI, File, UploadFile
14
+ from fastapi.responses import StreamingResponse
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ import openai
17
+ import tempfile
18
+ from dotenv import load_dotenv
19
+ from sentence_transformers import SentenceTransformer
20
+ import math
21
+ from collections import Counter
22
+ import json
23
+ import pandas as pd
24
+ import asyncio
25
+ import numpy as np
26
+ from deepgram import Deepgram
27
+ from fastapi.staticfiles import StaticFiles
28
+ from fastapi.responses import HTMLResponse
29
+ import openai as _openai_mod
30
+ import requests
31
+
32
+ import time
33
+ import wave
34
+ from fastapi import UploadFile, File
35
+ from starlette.responses import StreamingResponse
36
+ from pydub import AudioSegment
37
+ from google.cloud import speech_v1p1beta1 as speech
38
+ from google.cloud import translate_v2 as translate
39
+ from openai import OpenAI
40
+ load_dotenv()
41
+
42
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
43
+ openai.api_key = os.getenv("OPENAI_API_KEY")
44
+
45
+ app = FastAPI()
46
+
47
+ app.add_middleware(
48
+ CORSMiddleware,
49
+ allow_origins=["*"],
50
+ allow_credentials=True,
51
+ allow_methods=["*"],
52
+ allow_headers=["*"],
53
+ )
54
+
55
+ app.mount("/static", StaticFiles(directory="static"), name="static")
56
+
57
+ @app.get("/", response_class=HTMLResponse)
58
+ async def serve_html():
59
+ with open("templates/index.html", "r", encoding="utf-8") as f:
60
+ html_content = f.read()
61
+ return HTMLResponse(content=html_content)
62
+
63
+
64
+ chat_messages = [{"role": "system", "content": '''
65
+ You are Kammi, a friendly, medical assistant specializing in orthopedic surgery, human-like voice assistant built by Facile AI Solutions
66
+ You assist customers specifically with knee replacement surgery queries and you are the assistant of Dr.Sandeep, a highly experienced knee replacement surgeon.
67
+
68
+ Rules for your responses:
69
+
70
+ 1. *Context-driven answers only: Answer strictly based on the provided context and previous conversation history. Do not use external knowledge. Respond in **Telugu*.
71
+
72
+ 2. *General conversation*: Engage in greetings and casual conversation. If the user mentions their name, greet them personally using their name, in Telugu.
73
+
74
+ 3. *Technical/medical queries*:
75
+ Answer in *Telugu* based on the context or chat history.
76
+
77
+ - If the question is *relevant to knee replacement surgery and the answer is in the context or chat history, provide the answer in **Telugu*.
78
+ - If the question is *relevant but not present in the context*, respond: "దయచేసి డాక్టర్ సందీప్ లేదా రిసెప్షన్ ను సంప్రదించండి."
79
+
80
+ - Translate medical and technical terms into simple, **understandable conversational Telugu** wherever possible.
81
+ - The output must be in Telugu script, but common English medical or technical terms (like knee, hip, surgery, replacement, physiotherapy, arthritis, etc.)
82
+ should be written phonetically in Telugu, not translated literally.
83
+
84
+ Example:
85
+ “knee replacement” → మోకాలు రీప్లేస్‌మెంట్
86
+ “hip replacement” → హిప్ రీప్లేస్‌మెంట్
87
+ “surgery” → సర్జరీ
88
+ “physiotherapy” → ఫిజియోథెరపీ
89
+ Ensure the language sounds simple, natural, and conversational for Telugu-speaking patients.
90
+
91
+ 4. *Irrelevant queries*:
92
+ - If the question is completely unrelated to knee replacement surgery, politely decline in Telugu: "నేను కేవలం మోకాలు రీప్లేస్‌మెంట్ సర్జరీ సంబంధిత ప్రశ్నలకు సహాయం చేస్తాను."
93
+
94
+ 5. *Drive conversation*:
95
+ - After answering the user’s question, suggest a follow-up question from the context that you can answer, in *Telugu*
96
+ - Make the follow-up natural and conversational. The follow up question must be relevant to the current question or response in *Telugu*
97
+ - Do not include redundant statements like “మీరు మరింత సమాచారం కావాలనుకుంటే…”, "మీరు మరేదైనా అడగాలనుకుంటున్నారా" in the follow up questions
98
+ - If the user responds with confirmation like “yes”, “okay” give the answer for the previous follow-up question from the context.
99
+ - If the user ends the conversation, do not ask or suggest any follow-up question.
100
+
101
+
102
+ 6. *Readable voice output for gTTS*:
103
+ - Break sentences at natural punctuation: , . ? ! : ;
104
+ - Do not use #, **, or other markdown symbols.
105
+ Telugu Output Guidelines:
106
+ All numbers, decimals, and points must be fully spelled out in Telugu words.
107
+ Example: 2.5 lakh → రెండు లక్షల యాభై వేల రూపాయలు
108
+
109
+ 7. *Concise and human-like*:
110
+ - Keep answers short, conversational, and natural in *Telugu*.
111
+ - Maximum 40 words / ~20 seconds of speech.
112
+
113
+ 8. *Tone and style*:
114
+ - Helpful, friendly, approachable, and human-like.
115
+ - Maintain professionalism while being conversational.
116
+
117
+ 9. *About Dr.Sandeep*:
118
+ - ALL the below points must be in *Telugu*
119
+ - Over 5 years of experience in orthopedic and joint replacement surgery.
120
+ - Specializes in total and partial knee replacement procedures.
121
+ - Known for a patient-friendly approach, focusing on pre-surgery preparation, post-surgery rehabilitation, and pain management.
122
+ - Actively keeps up-to-date with the latest techniques and technologies in knee replacement surgery.
123
+ - Highly approachable and prefers that patients are well-informed about their treatment options and recovery process.
124
+
125
+ Always provide readable, streaming-friendly sentences in *Telugu* so gTTS can read smoothly. Drive conversation forward while staying strictly on knee replacement surgery topics, and suggest follow-up questions for which you have context-based answers.
126
+ '''}]
127
+
128
+ class BM25:
129
+ def __init__(self, corpus, k1=1.2, b=0.75):
130
+ self.corpus = [doc.split() if isinstance(doc, str) else doc for doc in corpus]
131
+ self.k1 = k1
132
+ self.b = b
133
+ self.N = len(self.corpus)
134
+ self.avgdl = sum(len(doc) for doc in self.corpus) / self.N
135
+ self.doc_freqs = self._compute_doc_frequencies()
136
+ self.idf = self._compute_idf()
137
+
138
+ def _compute_doc_frequencies(self):
139
+ """Count how many documents contain each term"""
140
+ df = {}
141
+ for doc in self.corpus:
142
+ unique_terms = set(doc)
143
+ for term in unique_terms:
144
+ df[term] = df.get(term, 0) + 1
145
+ return df
146
+
147
+ def _compute_idf(self):
148
+ """Compute the IDF for each term in the corpus"""
149
+ idf = {}
150
+ for term, df in self.doc_freqs.items():
151
+ idf[term] = math.log((self.N - df + 0.5) / (df + 0.5) + 1)
152
+ return idf
153
+
154
+ def score(self, query, document):
155
+ """Compute the BM25 score for one document and one query"""
156
+ query_terms = query.split() if isinstance(query, str) else query
157
+ doc_terms = document.split() if isinstance(document, str) else document
158
+ score = 0.0
159
+ freqs = Counter(doc_terms)
160
+ doc_len = len(doc_terms)
161
+
162
+ for term in query_terms:
163
+ if term not in freqs:
164
+ continue
165
+ f = freqs[term]
166
+ idf = self.idf.get(term, 0)
167
+ denom = f + self.k1 * (1 - self.b + self.b * doc_len / self.avgdl)
168
+ score += idf * (f * (self.k1 + 1)) / denom
169
+ return score
170
+
171
+ def rank(self, query):
172
+ """Rank all documents for a given query"""
173
+ return [(i, self.score(query, doc)) for i, doc in enumerate(self.corpus)]
174
+
175
+
176
+ def sigmoid_scaled(x, midpoint=3.0):
177
+ """
178
+ Sigmoid function with shifting.
179
+ `midpoint` controls where the output is 0.5.
180
+ """
181
+ return 1 / (1 + math.exp(-(x - midpoint)))
182
+
183
+ def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
184
+
185
+ return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
186
+
187
+ async def compute_similarity(query: str, query_embedding: np.ndarray, chunk_text: str, chunk_embedding: np.ndarray, sem_weight: float,syn_weight:float,bm25) -> float:
188
+
189
+ semantic_score = cosine_similarity(query_embedding, chunk_embedding)
190
+
191
+ # syntactic_score = fuzz.ratio(query, chunk_text) / 100.0
192
+ syntactic_score = bm25.score(query,chunk_text)
193
+ final_syntactic_score = sigmoid_scaled(syntactic_score)
194
+
195
+ combined_score = sem_weight * semantic_score + syn_weight * final_syntactic_score
196
+
197
+ return combined_score
198
+
199
+ async def retrieve_top_k_hybrid(query, k, sem_weight,syn_weight,bm25):
200
+
201
+ query_embedding = model.encode(query)
202
+
203
+ tasks = [
204
+
205
+ compute_similarity(query, query_embedding, row["Chunks"], row["Embeddings"] , sem_weight,syn_weight,bm25)
206
+
207
+ for _, row in df_expanded.iterrows()
208
+
209
+ ]
210
+
211
+ similarities = await asyncio.gather(*tasks)
212
+
213
+ df_expanded["similarity"] = similarities
214
+
215
+ top_results = df_expanded.sort_values(by="similarity", ascending=False).head(k)
216
+
217
+ return top_results["Chunks"].to_list()
218
+
219
+
220
+ os.makedirs("/tmp/transformers_cache", exist_ok=True)
221
+
222
+ model = SentenceTransformer("abhinand/MedEmbed-large-v0.1")
223
+ df_expanded = pd.read_excel("Database.xlsx") # Replace with your filename
224
+ df_expanded["Embeddings"] = df_expanded["Embeddings"].map(lambda x: json.loads(x))
225
+ corpus = df_expanded['Chunks'].to_list()
226
+ bm25 = BM25(corpus)
227
+
228
+
229
+ # --- gTTS helper: stream raw audio file in small chunks ---
230
+ # def tts_chunk_stream(text_chunk: str, lang: str = "en"):
231
+ # if not text_chunk.strip():
232
+ # return []
233
+
234
+ # tts = gTTS(text=text_chunk, lang=lang)
235
+ # temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
236
+ # tts.save(temp_file.name)
237
+
238
+ # def audio_stream():
239
+ # try:
240
+ # with open(temp_file.name, "rb") as f:
241
+ # chunk = f.read(1024)
242
+ # while chunk:
243
+ # yield chunk
244
+ # chunk = f.read(1024)
245
+ # finally:
246
+ # try:
247
+ # os.remove(temp_file.name)
248
+ # except Exception:
249
+ # pass
250
+
251
+ # return audio_stream()
252
+
253
+
254
+ def tts_chunk_stream(text_chunk: str, lang: str = "en"):
255
+ """
256
+ REST-based OpenAI TTS fallback for older openai SDKs (e.g. 0.28).
257
+ Returns a generator yielding MP3 byte chunks (1024 bytes).
258
+ """
259
+ if not text_chunk or not text_chunk.strip():
260
+ return []
261
+
262
+ # Map short lang -> locale (extend if needed)
263
+ language_map = {
264
+ "en": "en-US",
265
+ "en-US": "en-US",
266
+ "en-GB": "en-GB",
267
+ "hi": "hi-IN",
268
+ }
269
+ language_code = language_map.get(lang, "en-GB")
270
+
271
+ # TTS model & voice choice
272
+ model = "gpt-4o-mini-tts" # or "tts-1"
273
+ voice = "alloy" # alloy, verse, shimmer, echo, coral
274
+ fmt = "mp3"
275
+
276
+ # Resolve API key (prefer openai.api_key if available)
277
+ api_key = None
278
+ try:
279
+ # if you set openai.api_key earlier in your code, prefer it
280
+ api_key = getattr(_openai_mod, "api_key", None)
281
+ except Exception:
282
+ api_key = None
283
+
284
+ if not api_key:
285
+ api_key = os.getenv("OPENAI_API_KEY")
286
+
287
+ if not api_key:
288
+ print("OpenAI API key not found. Set openai.api_key or env var OPENAI_API_KEY.")
289
+ return []
290
+
291
+ url = "https://api.openai.com/v1/audio/speech"
292
+ headers = {
293
+ "Authorization": f"Bearer {api_key}",
294
+ "Content-Type": "application/json",
295
+ }
296
+
297
+ payload = {
298
+ "model": model,
299
+ "voice": voice,
300
+ "input": text_chunk,
301
+ "format": fmt
302
+ # "instructions" : "speak in cheerfull and positive tone"
303
+ # optional: "language": language_code # include if needed by API variation
304
+ }
305
+
306
+ try:
307
+ # Use stream=True so we can yield bytes progressively.
308
+ resp = requests.post(url, headers=headers, json=payload, stream=True, timeout=60)
309
+ except Exception as e:
310
+ print("OpenAI TTS request failed:", e)
311
+ return []
312
+
313
+ if resp.status_code != 200:
314
+ # Try to show helpful error message
315
+ try:
316
+ err = resp.json()
317
+ except Exception:
318
+ err = resp.text
319
+ print(f"OpenAI TTS REST call failed {resp.status_code}: {err}")
320
+ try:
321
+ resp.close()
322
+ except Exception:
323
+ pass
324
+ return []
325
+
326
+ # At this point resp.iter_content yields raw mp3 bytes
327
+ def audio_stream():
328
+ try:
329
+ for chunk in resp.iter_content(chunk_size=1024):
330
+ if chunk:
331
+ yield chunk
332
+ finally:
333
+ try:
334
+ resp.close()
335
+ except Exception:
336
+ pass
337
+ return audio_stream()
338
+
339
+
340
+
341
+
342
+ async def get_rag_response(user_message: str):
343
+ global chat_messages
344
+ Chunks = await retrieve_top_k_hybrid(user_message,15, 0.9, 0.1,bm25)
345
+ context = "======================================================================================================\n".join(Chunks)
346
+ chat_messages.append({"role": "user", "content": f'''
347
+ Context : {context}
348
+ User Query: {user_message}'''})
349
+ # print("chat_messages",chat_messages)
350
+ return chat_messages
351
+
352
+
353
+ # --- GPT + TTS async generator with smaller buffer like second code ---
354
+ async def gpt_tts_stream(prompt: str):
355
+ # start_time = time.time()
356
+ # print("started gpt_tts_stream",prompt)
357
+ global chat_messages
358
+ chat_messages = await get_rag_response(prompt)
359
+ # print(chat_messages,"chat_messages after getting RAG response")
360
+
361
+ # response = openai.ChatCompletion.create(
362
+ # model="gpt-4o",
363
+ # messages= chat_messages,
364
+ # stream=True
365
+ # )
366
+
367
+ bot_response = ""
368
+ buffer = ""
369
+ buffer_size = 30
370
+
371
+ # ✅ Must use the `with` block for streaming
372
+ with client.chat.completions.stream(
373
+ model="gpt-4o-mini",
374
+ messages=chat_messages,
375
+ ) as stream:
376
+
377
+ for event in stream:
378
+ if event.type == "content.delta":
379
+ delta = event.delta
380
+ bot_response = bot_response + delta
381
+ buffer += delta
382
+ if len(buffer) >= buffer_size and buffer.endswith((".", "!", ",", "?", "\n", ";", ":")):
383
+ print("the buffer is ")
384
+ print(buffer)
385
+ # audio_chunks = tts_chunk_stream(buffer)
386
+ for audio_chunk in tts_chunk_stream(buffer):
387
+ # print("chunk",buffer)
388
+ yield audio_chunk
389
+ buffer = ""
390
+
391
+ elif event.type == "content.done":
392
+ # 🧾 model finished — flush whatever is left
393
+ if buffer.strip():
394
+ print("the left over message")
395
+ print(buffer.strip())
396
+ for audio_chunk in tts_chunk_stream(buffer):
397
+ # print("chunk",buffer)
398
+ yield audio_chunk
399
+ buffer = ""
400
+
401
+
402
+ bot_response = bot_response.strip()
403
+ chat_messages.append({"role": "assistant", "content": bot_response})
404
+
405
+
406
+
407
+
408
+ # def convert_to_mono16_wav_bytes(audio_bytes: bytes) -> tuple[bytes, int]:
409
+ # print("i am inside the mono16 conversion")
410
+ # """Convert any uploaded audio (mp3/webm/wav) to mono 16-bit WAV bytes in memory."""
411
+ # audio = AudioSegment.from_file(io.BytesIO(audio_bytes))
412
+
413
+ # # Convert to mono
414
+ # if audio.channels != 1:
415
+ # audio = audio.set_channels(1)
416
+
417
+ # # Convert to 16-bit PCM
418
+ # if audio.sample_width != 2:
419
+ # audio = audio.set_sample_width(2)
420
+
421
+ # # Standardize sample rate to 16 kHz (required by Google STT)
422
+ # if audio.frame_rate != 16000:
423
+ # audio = audio.set_frame_rate(16000)
424
+
425
+ # # Export as in-memory WAV bytes
426
+ # wav_buffer = io.BytesIO()
427
+ # audio.export(wav_buffer, format="wav")
428
+ # wav_bytes = wav_buffer.getvalue()
429
+ # print("mono 16 conversion done successfully")
430
+ # return wav_bytes, 16000
431
+
432
+
433
+ # ------------------------------------------------------------------
434
+ # 2️⃣ Telugu STT (Speech-to-Text)
435
+ # ------------------------------------------------------------------
436
+ # def transcribe_telugu_audio(audio_bytes: bytes) -> tuple[str, float]:
437
+ # print("i am inside the stt (telugu to telugu)")
438
+ # wav_bytes, sample_rate = convert_to_mono16_wav_bytes(audio_bytes)
439
+ # print("mono 16 conversion done successfully and fetched")
440
+ # client = speech.SpeechClient()
441
+ # print("clinet called successfully")
442
+ # audio = speech.RecognitionAudio(content=wav_bytes)
443
+ # print("audio created successfully")
444
+ # config = speech.RecognitionConfig(
445
+ # encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
446
+ # sample_rate_hertz=sample_rate,
447
+ # language_code="te-IN",
448
+ # enable_automatic_punctuation=True,
449
+ # )
450
+
451
+ # print("\n🔊 Transcribing Telugu audio...")
452
+ # start_time = time.time()
453
+ # response = client.recognize(config=config, audio=audio)
454
+
455
+ # telugu_text = " ".join(
456
+ # [result.alternatives[0].transcript for result in response.results]
457
+ # )
458
+ # stt_time = time.time() - start_time
459
+ # print(f"✅ Telugu STT completed in {stt_time:.2f} seconds")
460
+
461
+ # return telugu_text.strip(), stt_time
462
+
463
+
464
+ # ------------------------------------------------------------------
465
+ # 3️⃣ Telugu → English Translation
466
+ # ------------------------------------------------------------------
467
+ # def translate_to_english(te_text: str) -> tuple[str, float]:
468
+ # translate_client = translate.Client()
469
+ # print("\n🌐 Translating to English...")
470
+ # start_time = time.time()
471
+
472
+ # result = translate_client.translate(te_text, target_language="en")
473
+ # english_text = result["translatedText"]
474
+
475
+ # translation_time = time.time() - start_time
476
+ # print(f"✅ Translation completed in {translation_time:.2f} seconds")
477
+ # manoj
478
+ # return english_text, translation_time
479
+
480
+
481
+ @app.post("/chat_stream")
482
+ async def chat_stream(file: UploadFile = File(...)):
483
+ start_time = time.time()
484
+ audio_bytes = await file.read()
485
+ print("audio file read")
486
+
487
+ transcription = client.audio.transcriptions.create(
488
+ model="gpt-4o-transcribe", # or "gpt-4o-mini-transcribe"
489
+ file=(file.filename, audio_bytes) # important: (filename, bytes)
490
+ )
491
+
492
+ telugu_text = transcription.text
493
+ print(f"the text is : {telugu_text}")
494
+ print(f"tts time : {time.time()-start_time}")
495
+ start_time = time.time()
496
+ translation = client.responses.create(
497
+ model="gpt-4o-mini",
498
+ temperature = 0,
499
+ top_p = 0,
500
+ input=f'''Translate the following Telugu text into English:
501
+ {telugu_text}
502
+ Give only the english translation, these are most probably queries from the user''')
503
+
504
+ english_text = translation.output[0].content[0].text
505
+ print(f"translation time {time.time() - start_time}")
506
+ print(f"the english text is {english_text}")
507
+
508
+ return StreamingResponse(gpt_tts_stream(english_text), media_type="audio/mpeg")
509
+
510
+
511
+
512
+ @app.post("/reset_chat")
513
+ async def reset_chat():
514
+ global chat_messages
515
+ chat_messages = [{
516
+ "role": "system",
517
+ "content": '''
518
+ You are Kammi, a friendly, medical assistant specializing in orthopedic surgery, human-like voice assistant built by Facile AI Solutions
519
+ You assist customers specifically with knee replacement surgery queries and you are the assistant of Dr.Sandeep, a highly experienced knee replacement surgeon.
520
+
521
+ Rules for your responses:
522
+
523
+ 1. *Context-driven answers only: Answer strictly based on the provided context and previous conversation history. Do not use external knowledge. Respond in **Telugu*.
524
+
525
+ 2. *General conversation*: Engage in greetings and casual conversation. If the user mentions their name, greet them personally using their name, in Telugu.
526
+
527
+ 3. *Technical/medical queries*:
528
+ Answer in *Telugu* based on the context or chat history.
529
+
530
+ - If the question is *relevant to knee replacement surgery and the answer is in the context or chat history, provide the answer in **Telugu*.
531
+ - If the question is *relevant but not present in the context*, respond: "దయచేసి డాక్టర్ సందీప్ లేదా రిసెప్షన్ ను సంప్రదించండి."
532
+
533
+ - Translate medical and technical terms into simple, **understandable conversational Telugu** wherever possible.
534
+ - The output must be in Telugu script, but common English medical or technical terms (like knee, hip, surgery, replacement, physiotherapy, arthritis, etc.)
535
+ should be written phonetically in Telugu, not translated literally.
536
+
537
+ Example:
538
+ “knee replacement” → మోకాలు రీప్లేస్‌మెంట్
539
+ “hip replacement” → హిప్ రీప్లేస్‌మెంట్
540
+ “surgery” → సర్జరీ
541
+ “physiotherapy” → ఫిజియోథెరపీ
542
+ Ensure the language sounds simple, natural, and conversational for Telugu-speaking patients.
543
+
544
+ 4. *Irrelevant queries*:
545
+ - If the question is completely unrelated to knee replacement surgery, politely decline in Telugu: "నేను కేవలం మోకాలు రీప్లేస్‌మెంట్ సర్జరీ సంబంధిత ప్రశ్నలకు సహాయం చేస్తాను."
546
+
547
+ 5. *Drive conversation*:
548
+ - After answering the user’s question, suggest a follow-up question from the context that you can answer, in *Telugu*
549
+ - Make the follow-up natural and conversational. The follow up question must be relevant to the current question or response in *Telugu*
550
+ - Do not include redundant statements like “మీరు మరింత సమాచారం కావాలనుకుంటే…”, "మీరు మరేదైనా అడగాలనుకుంటున్నారా" in the follow up questions
551
+ - If the user responds with confirmation like “yes”, “okay” give the answer for the previous follow-up question from the context.
552
+ - If the user ends the conversation, do not ask or suggest any follow-up question.
553
+
554
+
555
+ 6. *Readable voice output for gTTS*:
556
+ - Break sentences at natural punctuation: , . ? ! : ;
557
+ - Do not use #, **, or other markdown symbols.
558
+ Telugu Output Guidelines:
559
+ All numbers, decimals, and points must be fully spelled out in Telugu words.
560
+ Example: 2.5 lakh → రెండు లక్షల యాభై వేల రూపాయలు
561
+
562
+ 7. *Concise and human-like*:
563
+ - Keep answers short, conversational, and natural in *Telugu*.
564
+ - Maximum 40 words / ~20 seconds of speech.
565
+
566
+ 8. *Tone and style*:
567
+ - Helpful, friendly, approachable, and human-like.
568
+ - Maintain professionalism while being conversational.
569
+
570
+ 9. *About Dr.Sandeep*:
571
+ - ALL the below points must be in *Telugu*
572
+ - Over 5 years of experience in orthopedic and joint replacement surgery.
573
+ - Specializes in total and partial knee replacement procedures.
574
+ - Known for a patient-friendly approach, focusing on pre-surgery preparation, post-surgery rehabilitation, and pain management.
575
+ - Actively keeps up-to-date with the latest techniques and technologies in knee replacement surgery.
576
+ - Highly approachable and prefers that patients are well-informed about their treatment options and recovery process.
577
+
578
+ Always provide readable, streaming-friendly sentences in *Telugu* so gTTS can read smoothly. Drive conversation forward while staying strictly on knee replacement surgery topics, and suggest follow-up questions for which you have context-based answers.
579
+ '''
580
+ }]
581
+ return {"message": "Chat history reset successfully."}
static/robot.gif ADDED

Git LFS Details

  • SHA256: 0b4f7a07ff399ab418854bbfd68329c06699abacced0ff6b5df451e01a375bab
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
templates/index.html ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <title>Ortho Buddy — Voice</title>
6
+ <style>
7
+ html,body { height:100%; margin:0; font-family: "Segoe UI", Roboto, Arial; background: radial-gradient(circle at 50% 10%, #5ca4b0, #062219); color:#bfe6c9; }
8
+ .container { width:360px; margin:40px auto; text-align:center; }
9
+ h1 { letter-spacing:4px; font-size:36px; color:#2de08b; text-shadow:0 6px 30px rgba(0,0,0,0.7); margin:20px 0; }
10
+ .subtitle { color:#d6f3de; margin-bottom:10px; font-size:24px; }
11
+ .robot { width:270px; height:270px; background: url('/static/robot.gif') center/contain no-repeat; margin: 40px auto; border-radius:8px; }
12
+ .control { margin-top:20px; }
13
+ .record-btn { width:50px; height:50px; border-radius:60px; border:none; background:linear-gradient(rgb(248, 245, 248), rgb(248, 245, 248)); box-shadow: 0 10px 30px rgba(0,0,0,0.6); color:white; font-size:18px; cursor:pointer; }
14
+ .record-btn.recording { background: linear-gradient(#ff6666, #cc2222); box-shadow: 0 10px 30px rgba(0,0,0,0.7); }
15
+ .heading {
16
+ text-align: center;
17
+ }
18
+ .reset-btn {
19
+ position: fixed;
20
+ top: 20px;
21
+ right: 20px;
22
+ padding: 10px 20px;
23
+ background: linear-gradient(#2de08b, #0a8f5c);
24
+ border: none;
25
+ border-radius: 6px;
26
+ font-size: 16px;
27
+ font-weight: bold;
28
+ color: #062219;
29
+ cursor: pointer;
30
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4);
31
+ z-index: 1000;
32
+ transition: background 0.3s ease;
33
+ }
34
+ .reset-btn:hover {
35
+ background: linear-gradient(#1bc47a, #07734f);
36
+ }
37
+ </style>
38
+ <h1 class = "heading">Summit Clinics</h1>
39
+ </head>
40
+ <body>
41
+ <div class="container">
42
+
43
+ <div class="subtitle"><b>KAMMI</b></div>
44
+
45
+ <div class="robot" id="robotImg"></div>
46
+
47
+ <div class="control">
48
+ <button id="recBtn" class="record-btn">🎤</button>
49
+ </div>
50
+
51
+ <button id="resetBtn" class="reset-btn">New User</button>
52
+
53
+ <div class="text-stream" id="textStream"></div>
54
+
55
+ <!-- Hidden audio player for streamed voice -->
56
+ <audio id="player" controls autoplay hidden></audio>
57
+ </div>
58
+
59
+ <script>
60
+ let mediaRecorder;
61
+ let audioChunks = [];
62
+ let recBtn = document.getElementById("recBtn");
63
+ let textStream = document.getElementById("textStream");
64
+ let recording = false;
65
+ let player = document.getElementById("player");
66
+ document.getElementById("resetBtn").addEventListener("click", async () => {
67
+ try {
68
+ const response = await fetch("/reset_chat", {
69
+ method: "POST"
70
+ });
71
+ if (response.ok) {
72
+ showTempMessage("Please proceed.", "lightgreen");
73
+ } else {
74
+ const errorText = await response.text();
75
+ showTempMessage("Reset failed: " + errorText, "#ffb3b3");
76
+ }
77
+ } catch (error) {
78
+ showTempMessage("Reset error: " + error.message, "#ffb3b3");
79
+ }
80
+ });
81
+ // Utility function to show a message for 2 seconds
82
+ function showTempMessage(msg, color) {
83
+ const msgDiv = document.createElement("div");
84
+ msgDiv.style.color = color;
85
+ msgDiv.textContent = msg;
86
+ textStream.appendChild(msgDiv);
87
+ setTimeout(() => {
88
+ msgDiv.remove();
89
+ }, 2000);
90
+ }
91
+ recBtn.addEventListener("click", async () => {
92
+ if (!recording) {
93
+ await startRecording();
94
+ } else {
95
+ stopRecordingAndSend();
96
+ }
97
+ recording = !recording;
98
+ recBtn.textContent = recording ? "Stop" : "🎤";
99
+ recBtn.classList.toggle("recording", recording);
100
+ });
101
+ async function startRecording() {
102
+ textStream.innerHTML = ""; // clear previous
103
+ audioChunks = [];
104
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
105
+ alert("Your browser does not support microphone capture.");
106
+ return;
107
+ }
108
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
109
+ mediaRecorder = new MediaRecorder(stream);
110
+ mediaRecorder.ondataavailable = e => {
111
+ if (e.data && e.data.size > 0) audioChunks.push(e.data);
112
+ };
113
+ mediaRecorder.start();
114
+ }
115
+ function stopRecordingAndSend() {
116
+ if (!mediaRecorder) return;
117
+ mediaRecorder.stop();
118
+ mediaRecorder.onstop = async () => {
119
+ const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
120
+ const form = new FormData();
121
+ form.append("file", audioBlob, "recording.webm");
122
+ // fetch streaming audio directly
123
+ console.log("Sending audio to server...");
124
+ const resp = await fetch("/chat_stream", {
125
+ method: "POST",
126
+ body: form,
127
+ });
128
+ if (!resp.ok) {
129
+ const txt = await resp.text();
130
+ textStream.innerHTML += "<div style='color:#ffb3b3'>Server error: " + txt + "</div>";
131
+ return;
132
+ }
133
+ // create an object URL from streaming response
134
+ const mediaSource = new MediaSource();
135
+ player.src = URL.createObjectURL(mediaSource);
136
+ mediaSource.addEventListener('sourceopen', async () => {
137
+ const sourceBuffer = mediaSource.addSourceBuffer('audio/mpeg');
138
+ const reader = resp.body.getReader();
139
+ while (true) {
140
+ const { done, value } = await reader.read();
141
+ if (done) break;
142
+ sourceBuffer.appendBuffer(value);
143
+ await new Promise(resolve => sourceBuffer.addEventListener('updateend', resolve, { once: true }));
144
+ }
145
+ mediaSource.endOfStream();
146
+ });
147
+ player.play();
148
+ };
149
+ }
150
+ </script>
151
+ </body>
152
+ </html>