Spaces:

aaditkumar
/

JarvisAI

Sleeping

App Files Files Community

aaditkumar commited on Mar 16

Commit

5d7e1ed

verified ·

1 Parent(s): 48ab4ef

Upload 28 files

Browse files

Files changed (28) hide show

app/__init__.py +19 -0
app/__pycache__/__init__.cpython-312.pyc +0 -0
app/__pycache__/generate_thinking_audio.cpython-312.pyc +0 -0
app/__pycache__/main.cpython-312.pyc +0 -0
app/__pycache__/models.cpython-312.pyc +0 -0
app/generate_thinking_audio.py +66 -0
app/main.py +523 -0
app/models.py +22 -0
app/services/__init__.py +13 -0
app/services/__pycache__/__init__.cpython-312.pyc +0 -0
app/services/__pycache__/brain_service.cpython-312.pyc +0 -0
app/services/__pycache__/chat_service.cpython-312.pyc +0 -0
app/services/__pycache__/groq_service.cpython-312.pyc +0 -0
app/services/__pycache__/realtime_service.cpython-312.pyc +0 -0
app/services/__pycache__/vector_store.cpython-312.pyc +0 -0
app/services/brain_service.py +105 -0
app/services/chat_service.py +346 -0
app/services/groq_service.py +257 -0
app/services/realtime_service.py +277 -0
app/services/vector_store.py +111 -0
app/utils/__init__.py +9 -0
app/utils/__pycache__/__init__.cpython-312.pyc +0 -0
app/utils/__pycache__/key_rotation.cpython-312.pyc +0 -0
app/utils/__pycache__/retry.cpython-312.pyc +0 -0
app/utils/__pycache__/time_info.cpython-312.pyc +0 -0
app/utils/key_rotation.py +24 -0
app/utils/retry.py +38 -0
app/utils/time_info.py +22 -0

app/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""
+J.A.R.V.I.S APPLICATION PACKAGE
+===============================
+This directory is the main Python package for the J.A.R.V.I.S backend.
+The presence of __init__.py makes Python treat 'app' as a package, so you can:
+  from app.main import app
+  from app.models import ChatRequest
+  from app.services.chat_service import ChatService
+FILE STRUCTURE:
+  app/
+    __init__.py  - This file; marks 'app' as a package.
+    main.py      - FastAPI app and all HTTP endpoints (/chat, /chat/realtime, /health, etc.).
+    models.py    - Pydantic models for API requests, responses, and internal chat storage.
+    services/    - Business logic: chat sessions, Groq LLM, realtime (Tavily + Groq), vector store.
+    utils/       - Helpers: retry with backoff, current date/time for the LLM prompt.
+"""

app/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (928 Bytes). View file

app/__pycache__/generate_thinking_audio.cpython-312.pyc ADDED Viewed

Binary file (3.27 kB). View file

app/__pycache__/main.cpython-312.pyc ADDED Viewed

Binary file (30.9 kB). View file

app/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (1.53 kB). View file

app/generate_thinking_audio.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import asyncio
+import sys
+from pathlib import Path
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+AUDIO_DIR = PROJECT_ROOT / "frontend" / "audio"
+STARTER_PHRASES = [
+    ("starter_1", "One moment please."),
+    ("starter_2", "Sure, one moment."),
+    ("starter_3", "Got it, hold on."),
+    ("starter_4", "On it right now."),
+    ("starter_5", "Alright, give me a sec."),
+    ("starter_6", "Right, one moment."),
+    ("starter_7", "Okay, hold on."),
+    ("starter_8", "One second please."),
+    ("starter_9", "Give me a moment."),
+    ("starter_10", "Just a moment please."),
+]
+PHRASES = STARTER_PHRASES
+VOICE = "en-GB-RyanNeural"
+RATE = "+15%"
+async def generate_one(name: str, text: str) -> bool:
+    try:
+        import edge_tts
+    except ImportError:
+        return False
+    path = AUDIO_DIR / f"{name}.mp3"
+    try:
+        communicate = edge_tts.Communicate(text, VOICE, rate=RATE)
+        await communicate.save(str(path))
+        print(f"  [OK] {name}.mp3")
+        return True
+    except Exception as e:
+        print(f"  [FAIL] {name}.mp3: {e}")
+        return False
+async def main():
+    try:
+        import edge_tts
+    except ImportError:
+        print("edge-tts not installed. Run: pip install edge-tts")
+        return 1
+    AUDIO_DIR.mkdir(parents=True, exist_ok=True)
+    for f in AUDIO_DIR.glob("followup_*.mp3"):
+        try:
+            f.unlink()
+            print(f"  [REMOVED] {f.name}")
+        except OSError:
+            pass
+    print(f"Generating thinking audio in {AUDIO_DIR}...")
+    success = 0
+    for name, text in PHRASES:
+        if await generate_one(name, text):
+            success += 1
+    print(f"Done: {success}/{len(PHRASES)} files.")
+    return 0 if success == len(PHRASES) else 1
+if __name__ == "__main__":
+    try:
+        exit_code = asyncio.run(main())
+    except KeyboardInterrupt:
+        exit_code = 130
+    sys.exit(exit_code)

app/main.py ADDED Viewed

	@@ -0,0 +1,523 @@

+from pathlib import Path
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse, RedirectResponse
+from fastapi.staticfiles import StaticFiles
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from contextlib import asynccontextmanager
+import uvicorn
+import logging
+import json
+import time
+import re
+import base64
+import asyncio
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+import edge_tts
+from app.models import ChatRequest, ChatResponse, TTSRequest
+RATE_LIMIT_MESSAGE = (
+    "You've reached your daily API limit for this assistant. "
+    "Your credits will reset in a few hours, or you can upgrade your plan for more. "
+    "Please try again later."
+)
+def _is_rate_limit_error(exc: Exception) -> bool:
+    msg = str(exc).lower()
+    return "429" in str(exc) or "rate limit" in msg or "tokens per day" in msg
+from app.services.vector_store import VectorStoreService
+from app.services.groq_service import GroqService, AllGroqApisFailedError
+from app.services.realtime_service import RealtimeGroqService
+from app.services.chat_service import ChatService
+from app.services.brain_service import BrainService
+from config import (
+    VECTOR_STORE_DIR, GROQ_API_KEYS, GROQ_MODEL, TAVILY_API_KEY,
+    EMBEDDING_MODEL, CHUNK_SIZE, CHUNK_OVERLAP, MAX_CHAT_HISTORY_TURNS,
+    ASSISTANT_NAME, TTS_VOICE, TTS_RATE,
+)
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s | %(levelname)-8s | %(name)-20s | %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger("J.A.R.V.I.S")
+vector_store_service: VectorStoreService = None
+groq_service: GroqService = None
+realtime_service: RealtimeGroqService = None
+brain_service: BrainService = None
+chat_service: ChatService = None
+def print_title():
+    """Print the J.A.R.V.I.S ASCII art title."""
+    title = r"""
+   ╔══════════════════════════════════════════════════════════╗
+   ║                                                          ║
+   ║         ██╗ █████╗ ██████╗ ██╗   ██╗██╗███████╗          ║
+   ║         ██║██╔══██╗██╔══██╗██║   ██║██║██╔════╝          ║
+   ║         ██║███████║██████╔╝██║   ██║██║███████╗          ║
+   ║    ██   ██║██╔══██║██╔══██╗╚██╗ ██╔╝██║╚════██║          ║
+   ║    ╚█████╔╝██║  ██║██║  ██║ ╚████╔╝ ██║███████║          ║
+   ║     ╚════╝ ╚═╝  ╚═╝╚═╝  ╚═╝  ╚═══╝  ╚═╝╚══════╝          ║
+   ║                                                          ║
+   ║          Just A Rather Very Intelligent System           ║
+   ║                                                          ║
+   ╚══════════════════════════════════════════════════════════╝
+    """
+    print(title)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global vector_store_service, groq_service, realtime_service, brain_service, chat_service
+    print_title()
+    logger.info("=" * 60)
+    logger.info("J.A.R.V.I.S - Starting Up...")
+    logger.info("-" * 60)
+    logger.info("[CONFIG] Assistant name: %s", ASSISTANT_NAME)
+    logger.info("[CONFIG] Groq model: %s", GROQ_MODEL)
+    logger.info("[CONFIG] Groq API keys loaded: %d", len(GROQ_API_KEYS))
+    logger.info("[CONFIG] Tavily API key: %s", "configured" if TAVILY_API_KEY else "NOT SET")
+    logger.info("[CONFIG] Embedding model: %s", EMBEDDING_MODEL)
+    logger.info("[CONFIG] Chunk size: %d | Overlap: %d | Max history turns: %d",
+                CHUNK_SIZE, CHUNK_OVERLAP, MAX_CHAT_HISTORY_TURNS)
+    try:
+        logger.info("Initializing vector store service...")
+        t0 = time.perf_counter()
+        vector_store_service = VectorStoreService()
+        vector_store_service.create_vector_store()
+        logger.info("[TIMING] startup_vector_store: %.3fs", time.perf_counter() - t0)
+        logger.info("Initializing Groq service (general queries)...")
+        groq_service = GroqService(vector_store_service)
+        logger.info("Groq service initialized successfully")
+        logger.info("Initializing Realtime Groq service (with Tavily search)...")
+        realtime_service = RealtimeGroqService(vector_store_service)
+        logger.info("Realtime Groq service initialized successfully")
+        logger.info("Initializing Brain service (Groq query classification)...")
+        brain_service = BrainService()
+        logger.info("Brain service initialized successfully")
+        logger.info("Initializing chat service...")
+        chat_service = ChatService(groq_service, realtime_service, brain_service)
+        logger.info("Chat service initialized successfully")
+        logger.info("=" * 60)
+        logger.info("Service Status:")
+        logger.info("  - Vector Store: Ready")
+        logger.info("  - Groq AI (General): Ready")
+        logger.info("  - Groq AI (Realtime): Ready")
+        logger.info("  - Brain (Groq): Ready")
+        logger.info("  - Chat Service: Ready")
+        logger.info("=" * 60)
+        logger.info("J.A.R.V.I.S is online and ready!")
+        logger.info("API: http://localhost:8000")
+        logger.info("Frontend: http://localhost:8000/")
+        logger.info("-" * 60)
+        yield
+        logger.info("\nShutting down J.A.R.V.I.S...")
+        _tts_pool.shutdown(wait=True)
+        if chat_service:
+            for session_id in list(chat_service.sessions.keys()):
+                chat_service.save_chat_session(session_id)
+        logger.info("All sessions saved. Goodbye!")
+    except Exception as e:
+        logger.error(f"Fatal error during startup: {e}", exc_info=True)
+        raise
+app = FastAPI(
+    title="J.A.R.V.I.S API",
+    description="Just A Rather Very Intelligent System",
+    lifespan=lifespan,
+    docs_url=None,
+    redoc_url=None,
+    openapi_url=None
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class TimingMiddleware(BaseHTTPMiddleware):
+    async def dispatch(self, request: Request, call_next):
+        t0 = time.perf_counter()
+        response = await call_next(request)
+        elapsed = time.perf_counter() - t0
+        path = request.url.path
+        logger.info("[REQUEST] %s %s -> %s (%.3fs)", request.method, path, response.status_code, elapsed)
+        return response
+app.add_middleware(TimingMiddleware)
+@app.get("/api")
+async def api_info():
+    return {
+        "message": "J.A.R.V.I.S API",
+        "endpoints": {
+            "/chat": "General chat (non-streaming)",
+            "/chat/stream": "General chat (streaming chunks)",
+            "/chat/realtime": "Realtime chat (non-streaming)",
+            "/chat/realtime/stream": "Realtime chat (streaming chunks)",
+            "/chat/jarvis/stream": "Jarvis unified route (brain classifies, streams)",
+            "/chat/history/{session_id}": "Get chat history",
+            "/health": "System health check",
+            "/tts": "Text-to-speech (POST text, returns streamed MP3)"
+        }
+    }
+@app.get("/health")
+async def health():
+    try:
+        return {
+            "status": "healthy",
+            "vector_store": vector_store_service is not None,
+            "groq_service": groq_service is not None,
+            "realtime_service": realtime_service is not None,
+            "brain_service": brain_service is not None,
+            "chat_service": chat_service is not None
+        }
+    except Exception as e:
+        logger.warning("[API /health] Error: %s", e)
+        return {"status": "degraded", "error": str(e)}
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    logger.info("[API /chat] Incoming | session_id=%s | message_len=%d | message=%.100s",
+                request.session_id or "new", len(request.message), request.message)
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        response_text = chat_service.process_message(session_id, request.message)
+        chat_service.save_chat_session(session_id)
+        logger.info("[API /chat] Done | session_id=%s | response_len=%d", session_id[:12], len(response_text))
+        return ChatResponse(response=response_text, session_id=session_id)
+    except ValueError as e:
+        logger.warning("[API /chat] Invalid session_id: %s", e)
+        raise HTTPException(status_code=400, detail=str(e))
+    except AllGroqApisFailedError as e:
+        logger.error("[API /chat] All Groq APIs failed: %s", e)
+        raise HTTPException(status_code=503, detail=str(e))
+    except Exception as e:
+        if _is_rate_limit_error(e):
+            logger.warning("[API /chat] Rate limit hit: %s", e)
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error("[API /chat] Error: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
+_SPLIT_RE = re.compile(r"(?<=[\.!?,;:])\s+")
+_MIN_WORDS_FIRST = 2
+_MIN_WORDS = 3
+_MERGE_IF_WORDS = 2
+def _split_sentences(buf: str):
+    parts = _SPLIT_RE.split(buf)
+    if len(parts) <= 1:
+        return [], buf
+    raw = [p.strip() for p in parts[:-1] if p.strip()]
+    sentences, pending = [], ""
+    for s in raw:
+        if pending:
+            s = (pending + " " + s).strip()
+            pending = ""
+        min_req = _MIN_WORDS_FIRST if not sentences else _MIN_WORDS
+        if len(s.split()) < min_req:
+            pending = s
+            continue
+        sentences.append(s)
+    remaining = (pending + " " + parts[-1].strip()).strip() if pending else parts[-1].strip()
+    return sentences, remaining
+def _merge_short(sentences):
+    if not sentences:
+        return []
+    merged, i = [], 0
+    while i < len(sentences):
+        cur = sentences[i]
+        j = i + 1
+        while j < len(sentences) and len(sentences[j].split()) <= _MERGE_IF_WORDS:
+            cur = (cur + " " + sentences[j]).strip()
+            j += 1
+        merged.append(cur)
+        i = j
+    return merged
+def _generate_tts_sync(text: str, voice: str, rate: str) -> bytes:
+    async def _inner():
+        communicate = edge_tts.Communicate(text=text, voice=voice, rate=rate)
+        parts = []
+        async for chunk in communicate.stream():
+            if chunk["type"] == "audio":
+                parts.append(chunk["data"])
+        return b"".join(parts)
+    return asyncio.run(_inner())
+_tts_pool = ThreadPoolExecutor(max_workers=4)
+def _stream_generator(session_id: str, chunk_iter, is_realtime: bool, tts_enabled: bool = False):
+    yield f"data: {json.dumps({'session_id': session_id, 'chunk': '', 'done': False})}\n\n"
+    buffer = ""
+    held = None
+    is_first = True
+    audio_queue = []
+    def _submit(text):
+        if not text or not text.strip():
+            return
+        audio_queue.append((_tts_pool.submit(_generate_tts_sync, text, TTS_VOICE, TTS_RATE), text))
+    def _drain_ready():
+        events = []
+        while audio_queue and audio_queue[0][0].done():
+            fut, sent = audio_queue.pop(0)
+            try:
+                audio = fut.result()
+                b64 = base64.b64encode(audio).decode("ascii")
+                events.append(f"data: {json.dumps({'audio': b64, 'sentence': sent})}\n\n")
+            except Exception as exc:
+                logger.warning("[TTS-INLINE] Failed for '%s': %s", sent[:40], exc)
+        return events
+    try:
+        for chunk in chunk_iter:
+            if isinstance(chunk, dict) and "_activity" in chunk:
+                yield f"data: {json.dumps({'activity': chunk['_activity']})}\n\n"
+                continue
+            if isinstance(chunk, dict) and "_search_results" in chunk:
+                yield f"data: {json.dumps({'search_results': chunk['_search_results']})}\n\n"
+                continue
+            if not chunk:
+                continue
+            yield f"data: {json.dumps({'chunk': chunk, 'done': False})}\n\n"
+            if not tts_enabled:
+                continue
+            for ev in _drain_ready():
+                yield ev
+            buffer += chunk
+            sentences, buffer = _split_sentences(buffer)
+            sentences = _merge_short(sentences)
+            if held and sentences and len(sentences[0].split()) <= _MERGE_IF_WORDS:
+                held = (held + " " + sentences[0]).strip()
+                sentences = sentences[1:]
+            for i, sent in enumerate(sentences):
+                min_w = _MIN_WORDS_FIRST if is_first else _MIN_WORDS
+                if len(sent.split()) < min_w:
+                    continue
+                is_last = (i == len(sentences) - 1)
+                if held:
+                    _submit(held)
+                    held = None
+                    is_first = False
+                if is_last:
+                    held = sent
+                else:
+                    _submit(sent)
+                    is_first = False
+    except Exception as e:
+        for fut, _ in audio_queue:
+            fut.cancel()
+        yield f"data: {json.dumps({'chunk': '', 'done': True, 'error': str(e)})}\n\n"
+        return
+    if tts_enabled:
+        remaining = buffer.strip()
+        if held:
+            if remaining and len(remaining.split()) <= _MERGE_IF_WORDS:
+                _submit((held + " " + remaining).strip())
+            else:
+                _submit(held)
+                if remaining:
+                    _submit(remaining)
+        elif remaining:
+            _submit(remaining)
+        for fut, sent in audio_queue:
+            try:
+                audio = fut.result(timeout=15)
+                b64 = base64.b64encode(audio).decode("ascii")
+                yield f"data: {json.dumps({'audio': b64, 'sentence': sent})}\n\n"
+            except FuturesTimeoutError:
+                logger.warning("[TTS-INLINE] Timeout for '%s' (15s)", (sent or "")[:40])
+            except Exception as exc:
+                logger.warning("[TTS-INLINE] Failed for '%s': %s", (sent or "")[:40], exc)
+    yield f"data: {json.dumps({'chunk': '', 'done': True, 'session_id': session_id})}\n\n"
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    logger.info("[API /chat/stream] Incoming | session_id=%s | message_len=%d | message=%.100s",
+                request.session_id or "new", len(request.message), request.message)
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        chunk_iter = chat_service.process_message_stream(session_id, request.message)
+        return StreamingResponse(
+            _stream_generator(session_id, chunk_iter, is_realtime=False, tts_enabled=request.tts),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except AllGroqApisFailedError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    except Exception as e:
+        if _is_rate_limit_error(e):
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error("[API /chat/stream] Error: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat/realtime", response_model=ChatResponse)
+async def chat_realtime(request: ChatRequest):
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    if not realtime_service:
+        raise HTTPException(status_code=503, detail="Realtime service not initialized")
+    logger.info("[API /chat/realtime] Incoming | session_id=%s | message_len=%d | message=%.100s",
+                request.session_id or "new", len(request.message), request.message)
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        response_text = chat_service.process_realtime_message(session_id, request.message)
+        chat_service.save_chat_session(session_id)
+        logger.info("[API /chat/realtime] Done | session_id=%s | response_len=%d", session_id[:12], len(response_text))
+        return ChatResponse(response=response_text, session_id=session_id)
+    except ValueError as e:
+        logger.warning("[API /chat/realtime] Invalid session_id: %s", e)
+        raise HTTPException(status_code=400, detail=str(e))
+    except AllGroqApisFailedError as e:
+        logger.error("[API /chat/realtime] All Groq APIs failed: %s", e)
+        raise HTTPException(status_code=503, detail=str(e))
+    except Exception as e:
+        if _is_rate_limit_error(e):
+            logger.warning("[API /chat/realtime] Rate limit hit: %s", e)
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error("[API /chat/realtime] Error: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error processing chat: {str(e)}")
+@app.post("/chat/realtime/stream")
+async def chat_realtime_stream(request: ChatRequest):
+    if not chat_service or not realtime_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
+    logger.info("[API /chat/realtime/stream] Incoming | session_id=%s | message_len=%d | message=%.100s",
+                request.session_id or "new", len(request.message), request.message)
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        chunk_iter = chat_service.process_realtime_message_stream(session_id, request.message)
+        return StreamingResponse(
+            _stream_generator(session_id, chunk_iter, is_realtime=True, tts_enabled=request.tts),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except AllGroqApisFailedError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    except Exception as e:
+        if _is_rate_limit_error(e):
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error("[API /chat/realtime/stream] Error: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/chat/jarvis/stream")
+async def chat_jarvis_stream(request: ChatRequest):
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Service not initialized")
+    logger.info("[API /chat/jarvis/stream] Incoming | session_id=%s | message_len=%d | message=%.100s",
+                request.session_id or "new", len(request.message), request.message)
+    try:
+        session_id = chat_service.get_or_create_session(request.session_id)
+        chunk_iter = chat_service.process_jarvis_message_stream(session_id, request.message)
+        return StreamingResponse(
+            _stream_generator(session_id, chunk_iter, is_realtime=True, tts_enabled=request.tts),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except AllGroqApisFailedError as e:
+        raise HTTPException(status_code=503, detail=str(e))
+    except Exception as e:
+        if _is_rate_limit_error(e):
+            raise HTTPException(status_code=429, detail=RATE_LIMIT_MESSAGE)
+        logger.error("[API /chat/jarvis/stream] Error: %s", e, exc_info=True)
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/chat/history/{session_id}")
+async def get_chat_history(session_id: str):
+    if not chat_service:
+        raise HTTPException(status_code=503, detail="Chat service not initialized")
+    if not chat_service.validate_session_id(session_id):
+        raise HTTPException(status_code=400, detail="Invalid session_id format")
+    try:
+        messages = chat_service.get_chat_history(session_id)
+        return {
+            "session_id": session_id,
+            "messages": [{"role": msg.role, "content": msg.content} for msg in messages]
+        }
+    except Exception as e:
+        logger.error(f"Error retrieving history: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error retrieving history: {str(e)}")
+@app.post("/tts")
+async def text_to_speech(request: TTSRequest):
+    text = request.text.strip()
+    if not text:
+        raise HTTPException(status_code=400, detail="Text is required")
+    async def generate():
+        try:
+            communicate = edge_tts.Communicate(text=text, voice=TTS_VOICE, rate=TTS_RATE)
+            async for chunk in communicate.stream():
+                if chunk["type"] == "audio":
+                    yield chunk["data"]
+        except Exception as e:
+            logger.error(f"[TTS] Error generating speech: %s", e)
+    return StreamingResponse(
+        generate(),
+        media_type="audio/mpeg",
+        headers={"Cache-Control": "no-cache"},
+    )
+_frontend_dir = Path(__file__).resolve().parent.parent / "frontend"
+if _frontend_dir.exists():
+    app.mount("/", StaticFiles(directory=str(_frontend_dir), html=True), name="frontend")
+def run():
+    uvicorn.run(
+        "app.main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )
+if __name__ == "__main__":
+    run()

app/models.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=32_000)
+    session_id: Optional[str] = None
+    tts: bool = False
+class ChatResponse(BaseModel):
+    response: str
+    session_id: str
+class ChatHistory(BaseModel):
+    session_id: str
+    messages: List[ChatMessage]
+class TTSRequest(BaseModel):
+    text: str = Field(..., min_length=1, max_length=5000)

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+SERVICES PACKAGE
+================
+Business logic lives here. The API layer (app.main) calls these services;
+they do not handle HTTP, only chat flow, LLM calls, and data.
+MODULES:
+  chat_service     - Sessions (get/create, load from disk), message list, format history for LLM, save to disk.
+  groq_service     - General chat: retrieve context from vector store, build prompt, call Groq LLM.
+  realtime_service  - Realtime chat: Tavily search first, then same as groq (inherits GroqService).
+  vector_store     - Load learning_data + chats_data, chunk, embed, FAISS index; provide retriever for context.
+"""

app/services/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (759 Bytes). View file

app/services/__pycache__/brain_service.cpython-312.pyc ADDED Viewed

Binary file (6.71 kB). View file

app/services/__pycache__/chat_service.cpython-312.pyc ADDED Viewed

Binary file (21.1 kB). View file

app/services/__pycache__/groq_service.cpython-312.pyc ADDED Viewed

Binary file (13.4 kB). View file

app/services/__pycache__/realtime_service.cpython-312.pyc ADDED Viewed

Binary file (14.4 kB). View file

app/services/__pycache__/vector_store.cpython-312.pyc ADDED Viewed

Binary file (7.32 kB). View file

app/services/brain_service.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import logging
+import re
+import time
+from typing import List, Optional, Tuple, Literal
+from config import GROQ_API_KEYS, GROQ_BRAIN_MODEL
+logger = logging.getLogger("J.A.R.V.I.S")
+QueryType = Literal["general", "realtime"]
+MAX_CONTEXT_TURNS = 6
+MAX_MESSAGE_PREVIEW = 500
+REASONING_GENERAL = "Answerable from knowledge and context"
+REASONING_REALTIME = "Needs live web search"
+REASONING_DEFAULT = "Brain unavailable; defaulting to realtime"
+REASONING_UNCLEAR = "Unclear; defaulting to realtime"
+_BRAIN_SYSTEM_PROMPT = """You are a query classifier for an AI assistant. Your ONLY job is to decide whether a user's message needs LIVE WEB SEARCH or not.
+Output EXACTLY one word: either "general" or "realtime".
+- general: ONLY questions that are purely from static knowledge, learning data, or conversation. Examples: "Tell me a joke", "What did I ask you before?", "Open YouTube", "Write a poem about cats", "How do I improve my coding?", "What is the capital of France?", casual chit-chat. NO questions about people, current events, or things that could change.
+- realtime: ALWAYS use realtime for:
+  * ANY question about a person (famous or not): "Who is Elon Musk?", "Tell me about [person]", "What is [name] known for?", "Who is that actor?" — the LLM has no real-time data; web search finds current info and may find info on lesser-known people.
+  * Anything that could have changed: news, weather, stock prices, sports scores, elections, "latest", "current", "today", "recent", "now".
+  * Factual lookups where real-time data would be better: events, companies, products, releases, versions.
+STRONG RULE: If the question is about a person (who, what, tell me about, etc.) → ALWAYS "realtime". The LLM cannot know current facts; web search can.
+When in doubt, prefer "realtime" — it's better to search when not needed than to miss current information.
+Output ONLY the word. No explanation, no punctuation, no other text."""
+class BrainService:
+    def __init__(self):
+        self._llms = []
+        if GROQ_API_KEYS:
+            try:
+                from langchain_groq import ChatGroq
+                self._llms = [
+                    ChatGroq(
+                        groq_api_key=key,
+                        model_name=GROQ_BRAIN_MODEL,
+                        temperature=0.0,
+                        max_tokens=20,
+                        request_timeout=10,
+                    )
+                    for key in GROQ_API_KEYS
+                ]
+                logger.info("[BRAIN] Groq brain initialized (model: %s) with %d key(s)", GROQ_BRAIN_MODEL, len(self._llms))
+            except Exception as e:
+                logger.warning("[BRAIN] Failed to create Groq brain: %s", e)
+        if not self._llms:
+            logger.warning("[BRAIN] No API keys. Classification will default to realtime.")
+    def classify(
+        self,
+        user_message: str,
+        chat_history: Optional[List[Tuple[str, str]]] = None,
+        key_index: int = 0,
+    ) -> Tuple[QueryType, str, int]:
+        if not self._llms:
+            return ("realtime", REASONING_DEFAULT, 0)
+        context_lines = []
+        if chat_history:
+            for u, a in chat_history[-MAX_CONTEXT_TURNS:]:
+                u_preview = (u or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(u or "") > MAX_MESSAGE_PREVIEW else "")
+                a_preview = (a or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(a or "") > MAX_MESSAGE_PREVIEW else "")
+                context_lines.append(f"User: {u_preview}")
+                context_lines.append(f"Assistant: {a_preview}")
+        context_block = "\n".join(context_lines) if context_lines else "(No prior conversation)"
+        msg_preview = (user_message or "")[:MAX_MESSAGE_PREVIEW]
+        user_content = f"""Conversation so far:
+{context_block}
+Current user message: {msg_preview}
+Classify the current message. Output ONLY: general or realtime"""
+        t0 = time.perf_counter()
+        try:
+            from langchain_core.messages import SystemMessage, HumanMessage
+            idx = key_index % len(self._llms)
+            llm = self._llms[idx]
+            response = llm.invoke([
+                SystemMessage(content=_BRAIN_SYSTEM_PROMPT),
+                HumanMessage(content=user_content),
+            ])
+            text = (response.content or "").strip().lower()
+        except Exception as e:
+            elapsed_ms = int((time.perf_counter() - t0) * 1000)
+            logger.warning("[BRAIN] Groq error after %d ms: %s. Defaulting to realtime.", elapsed_ms, e)
+            return ("realtime", f"API error: {str(e)[:60]}", elapsed_ms)
+        elapsed_ms = int((time.perf_counter() - t0) * 1000)
+        if re.search(r"\brealtime\b", text):
+            logger.info("[BRAIN] Groq (key #%d) returned realtime in %d ms", key_index + 1, elapsed_ms)
+            return ("realtime", REASONING_REALTIME, elapsed_ms)
+        if re.search(r"\bgeneral\b", text):
+            logger.info("[BRAIN] Groq (key #%d) returned general in %d ms", key_index + 1, elapsed_ms)
+            return ("general", REASONING_GENERAL, elapsed_ms)
+        logger.warning("[BRAIN] Unexpected output: %r in %d ms. Defaulting to realtime.", text[:100], elapsed_ms)
+        return ("realtime", REASONING_UNCLEAR, elapsed_ms)

app/services/chat_service.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import json
+import logging
+import time
+from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+from pathlib import Path
+from typing import List, Optional, Dict, Iterator, Any, Union
+import uuid
+import threading
+from config import CHATS_DATA_DIR, MAX_CHAT_HISTORY_TURNS, GROQ_API_KEYS
+from app.models import ChatMessage, ChatHistory
+from app.services.groq_service import GroqService
+from app.services.realtime_service import RealtimeGroqService
+from app.services.brain_service import BrainService
+from app.utils.key_rotation import get_next_key_pair
+logger = logging.getLogger("J.A.R.V.I.S")
+JARVIS_BRAIN_SEARCH_TIMEOUT = 15
+SAVE_EVERY_N_CHUNKS = 5
+class ChatService:
+    def __init__(
+        self,
+        groq_service: GroqService,
+        realtime_service: RealtimeGroqService = None,
+        brain_service: BrainService = None,
+    ):
+        self.groq_service = groq_service
+        self.realtime_service = realtime_service
+        self.brain_service = brain_service
+        self.sessions: Dict[str, List[ChatMessage]] = {}
+        self._save_lock = threading.Lock()
+    def load_session_from_disk(self, session_id: str) -> bool:
+        safe_session_id = session_id.replace("-", "").replace(" ", "_")
+        filename = f"chat_{safe_session_id}.json"
+        filepath = CHATS_DATA_DIR / filename
+        if not filepath.exists():
+            return False
+        try:
+            with open(filepath, "r", encoding="utf-8") as f:
+                chat_dict = json.load(f)
+            messages = []
+            for msg in chat_dict.get("messages", []):
+                if not isinstance(msg, dict):
+                    continue
+                role = msg.get("role")
+                role = role if role in ("user", "assistant") else "user"
+                content = msg.get("content")
+                content = content if isinstance(content, str) else str(content or "")
+                messages.append(ChatMessage(role=role, content=content))
+            self.sessions[session_id] = messages
+            return True
+        except Exception as e:
+            logger.warning("Failed to load session %s from disk: %s", session_id, e)
+            return False
+    def validate_session_id(self, session_id: str) -> bool:
+        if not session_id or not session_id.strip():
+            return False
+        if "\0" in session_id:
+            return False
+        if ".." in session_id or "/" in session_id or "\\" in session_id:
+            return False
+        if len(session_id) > 255:
+            return False
+        return True
+    def get_or_create_session(self, session_id: Optional[str] = None) -> str:
+        t0 = time.perf_counter()
+        if not session_id:
+            new_session_id = str(uuid.uuid4())
+            self.sessions[new_session_id] = []
+            logger.info("[TIMING] session_get_or_create: %.3fs (new)", time.perf_counter() - t0)
+            return new_session_id
+        if not self.validate_session_id(session_id):
+            raise ValueError(
+                f"Invalid session_id format: {session_id}. Session ID must be non-empty, "
+                "not contain path traversal characters, and be under 255 characters."
+            )
+        if session_id in self.sessions:
+            logger.info("[TIMING] session_get_or_create: %.3fs (memory)", time.perf_counter() - t0)
+            return session_id
+        if self.load_session_from_disk(session_id):
+            logger.info("[TIMING] session_get_or_create: %.3fs (disk)", time.perf_counter() - t0)
+            return session_id
+        self.sessions[session_id] = []
+        logger.info("[TIMING] session_get_or_create: %.3fs (new_id)", time.perf_counter() - t0)
+        return session_id
+    def add_message(self, session_id: str, role: str, content: str):
+        if session_id not in self.sessions:
+            self.sessions[session_id] = []
+        self.sessions[session_id].append(ChatMessage(role=role, content=content))
+    def get_chat_history(self, session_id: str) -> List[ChatMessage]:
+        return self.sessions.get(session_id, [])
+    def format_history_for_llm(self, session_id: str, exclude_last: bool = False) -> List[tuple]:
+        messages = self.get_chat_history(session_id)
+        history = []
+        messages_to_process = messages[:-1] if exclude_last and messages else messages
+        i = 0
+        while i < len(messages_to_process) - 1:
+            user_msg = messages_to_process[i]
+            ai_msg = messages_to_process[i + 1]
+            if user_msg.role == "user" and ai_msg.role == "assistant":
+                u_content = user_msg.content if isinstance(user_msg.content, str) else str(user_msg.content or "")
+                a_content = ai_msg.content if isinstance(ai_msg.content, str) else str(ai_msg.content or "")
+                history.append((u_content, a_content))
+                i += 2
+            else:
+                i += 1
+        if len(history) > MAX_CHAT_HISTORY_TURNS:
+            history = history[-MAX_CHAT_HISTORY_TURNS:]
+        return history
+    def process_message(self, session_id: str, user_message: str) -> str:
+        logger.info("[GENERAL] Session: %s | User: %.200s", session_id[:12], user_message)
+        self.add_message(session_id, "user", user_message)
+        chat_history = self.format_history_for_llm(session_id, exclude_last=True)
+        logger.info("[GENERAL] History pairs sent to LLM: %d", len(chat_history))
+        _, chat_idx = get_next_key_pair(len(GROQ_API_KEYS), need_brain=False)
+        response = self.groq_service.get_response(question=user_message, chat_history=chat_history, key_start_index=chat_idx)
+        self.add_message(session_id, "assistant", response)
+        logger.info("[GENERAL] Response length: %d chars | Preview: %.120s", len(response), response)
+        return response
+    def process_realtime_message(self, session_id: str, user_message: str) -> str:
+        if not self.realtime_service:
+            raise ValueError("Realtime service is not initialized. Cannot process realtime queries.")
+        logger.info("[REALTIME] Session: %s | User: %.200s", session_id[:12], user_message)
+        self.add_message(session_id, "user", user_message)
+        chat_history = self.format_history_for_llm(session_id, exclude_last=True)
+        logger.info("[REALTIME] History pairs sent to LLM: %d", len(chat_history))
+        _, chat_idx = get_next_key_pair(len(GROQ_API_KEYS), need_brain=False)
+        response = self.realtime_service.get_response(question=user_message, chat_history=chat_history, key_start_index=chat_idx)
+        self.add_message(session_id, "assistant", response)
+        logger.info("[REALTIME] Response length: %d chars | Preview: %.120s", len(response), response)
+        return response
+    def process_message_stream(
+        self, session_id: str, user_message: str
+    ) -> Iterator[Union[str, Dict[str, Any]]]:
+        logger.info("[GENERAL-STREAM] Session: %s | User: %.200s", session_id[:12], user_message)
+        self.add_message(session_id, "user", user_message)
+        self.add_message(session_id, "assistant", "")
+        chat_history = self.format_history_for_llm(session_id, exclude_last=True)
+        logger.info("[GENERAL-STREAM] History pairs sent to LLM: %d", len(chat_history))
+        yield {"_activity": {"event": "query_detected", "message": user_message}}
+        yield {"_activity": {"event": "routing", "route": "general"}}
+        yield {"_activity": {"event": "streaming_started", "route": "general"}}
+        _, chat_idx = get_next_key_pair(len(GROQ_API_KEYS), need_brain=False)
+        chunk_count = 0
+        t0 = time.perf_counter()
+        try:
+            for chunk in self.groq_service.stream_response(
+                question=user_message, chat_history=chat_history, key_start_index=chat_idx
+            ):
+                if isinstance(chunk, dict):
+                    yield chunk
+                    continue
+                if chunk_count == 0:
+                    elapsed_ms = int((time.perf_counter() - t0) * 1000)
+                    yield {"_activity": {"event": "first_chunk", "route": "general", "elapsed_ms": elapsed_ms}}
+                self.sessions[session_id][-1].content += chunk
+                chunk_count += 1
+                if chunk_count % SAVE_EVERY_N_CHUNKS == 0:
+                    self.save_chat_session(session_id, log_timing=False)
+                yield chunk
+        finally:
+            final_response = self.sessions[session_id][-1].content
+            logger.info("[GENERAL-STREAM] Completed | Chunks: %d | Response length: %d chars", chunk_count, len(final_response))
+            self.save_chat_session(session_id)
+    def process_realtime_message_stream(
+        self, session_id: str, user_message: str
+    ) -> Iterator[Union[str, Dict[str, Any]]]:
+        if not self.realtime_service:
+            raise ValueError("Realtime service is not initialized.")
+        logger.info("[REALTIME-STREAM] Session: %s | User: %.200s", session_id[:12], user_message)
+        self.add_message(session_id, "user", user_message)
+        self.add_message(session_id, "assistant", "")
+        chat_history = self.format_history_for_llm(session_id, exclude_last=True)
+        logger.info("[REALTIME-STREAM] History pairs sent to LLM: %d", len(chat_history))
+        yield {"_activity": {"event": "query_detected", "message": user_message}}
+        yield {"_activity": {"event": "routing", "route": "realtime"}}
+        yield {"_activity": {"event": "streaming_started", "route": "realtime"}}
+        _, chat_idx = get_next_key_pair(len(GROQ_API_KEYS), need_brain=False)
+        chunk_count = 0
+        t0 = time.perf_counter()
+        try:
+            for chunk in self.realtime_service.stream_response(
+                question=user_message, chat_history=chat_history, key_start_index=chat_idx
+            ):
+                if isinstance(chunk, dict):
+                    yield chunk
+                    continue
+                if chunk_count == 0:
+                    elapsed_ms = int((time.perf_counter() - t0) * 1000)
+                    yield {"_activity": {"event": "first_chunk", "route": "realtime", "elapsed_ms": elapsed_ms}}
+                self.sessions[session_id][-1].content += chunk
+                chunk_count += 1
+                if chunk_count % SAVE_EVERY_N_CHUNKS == 0:
+                    self.save_chat_session(session_id, log_timing=False)
+                yield chunk
+        finally:
+            final_response = self.sessions[session_id][-1].content
+            logger.info("[REALTIME-STREAM] Completed | Chunks: %d | Response length: %d chars", chunk_count, len(final_response))
+            self.save_chat_session(session_id)
+    def process_jarvis_message_stream(
+        self, session_id: str, user_message: str
+    ) -> Iterator[Union[str, Dict[str, Any]]]:
+        logger.info("[JARVIS-STREAM] Session: %s | User: %.200s", session_id[:12], user_message)
+        self.add_message(session_id, "user", user_message)
+        self.add_message(session_id, "assistant", "")
+        chat_history = self.format_history_for_llm(session_id, exclude_last=True)
+        yield {"_activity": {"event": "query_detected", "message": user_message}}
+        brain_idx, chat_idx = get_next_key_pair(len(GROQ_API_KEYS), need_brain=True)
+        query_type = "realtime"
+        reasoning = "Defaulting to realtime"
+        brain_elapsed_ms = 0
+        formatted_results = ""
+        search_payload = None
+        def _run_brain():
+            if self.brain_service and brain_idx is not None:
+                qt, r, ms = self.brain_service.classify(user_message, chat_history, key_index=brain_idx)
+                return (qt, r, ms)
+            return ("realtime", "No brain service", 0)
+        def _run_search():
+            if self.realtime_service:
+                return self.realtime_service.prefetch_web_search(user_message, chat_history)
+            return ("", None)
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            future_brain = executor.submit(_run_brain)
+            future_search = executor.submit(_run_search)
+            try:
+                query_type, reasoning, brain_elapsed_ms = future_brain.result(timeout=JARVIS_BRAIN_SEARCH_TIMEOUT)
+            except FuturesTimeoutError:
+                logger.warning("[JARVIS] Brain classification timed out after %ds, defaulting to realtime", JARVIS_BRAIN_SEARCH_TIMEOUT)
+                query_type, reasoning, brain_elapsed_ms = "realtime", "Brain timeout, defaulting to realtime", 0
+            if query_type == "general":
+                formatted_results, search_payload = "", None
+            else:
+                try:
+                    formatted_results, search_payload = future_search.result(timeout=JARVIS_BRAIN_SEARCH_TIMEOUT)
+                except FuturesTimeoutError:
+                    logger.warning("[JARVIS] Web search prefetch timed out after %ds", JARVIS_BRAIN_SEARCH_TIMEOUT)
+                    formatted_results, search_payload = "", None
+        logger.info("[JARVIS] Brain: %s in %d ms - %s", query_type, brain_elapsed_ms, reasoning)
+        yield {"_activity": {"event": "decision", "query_type": query_type, "reasoning": reasoning, "elapsed_ms": brain_elapsed_ms}}
+        yield {"_activity": {"event": "routing", "route": query_type}}
+        if query_type == "realtime" and search_payload:
+            yield {"_search_results": search_payload}
+        yield {"_activity": {"event": "streaming_started", "route": query_type}}
+        chunk_count = 0
+        t0 = time.perf_counter()
+        try:
+            if query_type == "general":
+                stream = self.groq_service.stream_response(
+                    question=user_message, chat_history=chat_history, key_start_index=chat_idx
+                )
+            else:
+                if not self.realtime_service:
+                    raise ValueError("Realtime service not initialized.")
+                stream = self.realtime_service.stream_response_with_prefetched(
+                    question=user_message,
+                    chat_history=chat_history,
+                    formatted_results=formatted_results,
+                    payload=search_payload,
+                    key_start_index=chat_idx,
+                )
+            for chunk in stream:
+                if isinstance(chunk, dict):
+                    yield chunk
+                    continue
+                if chunk_count == 0:
+                    elapsed_ms = int((time.perf_counter() - t0) * 1000)
+                    yield {"_activity": {"event": "first_chunk", "route": query_type, "elapsed_ms": elapsed_ms}}
+                self.sessions[session_id][-1].content += chunk
+                chunk_count += 1
+                if chunk_count % SAVE_EVERY_N_CHUNKS == 0:
+                    self.save_chat_session(session_id, log_timing=False)
+                yield chunk
+        finally:
+            final_response = self.sessions[session_id][-1].content
+            logger.info("[JARVIS-STREAM] Completed | Route: %s | Chunks: %d | Response length: %d chars",
+                        query_type, chunk_count, len(final_response))
+            self.save_chat_session(session_id)
+    def save_chat_session(self, session_id: str, log_timing: bool = True):
+        if session_id not in self.sessions or not self.sessions[session_id]:
+            return
+        messages = self.sessions[session_id]
+        safe_session_id = session_id.replace("-", "").replace(" ", "_")
+        filename = f"chat_{safe_session_id}.json"
+        filepath = CHATS_DATA_DIR / filename
+        chat_dict = {
+            "session_id": session_id,
+            "messages": [{"role": msg.role, "content": msg.content} for msg in messages]
+        }
+        max_retries = 3
+        last_exc = None
+        for attempt in range(max_retries):
+            try:
+                with self._save_lock:
+                    t0 = time.perf_counter() if log_timing else 0
+                    with open(filepath, "w", encoding="utf-8") as f:
+                        json.dump(chat_dict, f, indent=2, ensure_ascii=False)
+                    if log_timing:
+                        logger.info("[TIMING] save_session_json: %.3fs", time.perf_counter() - t0)
+                    return
+            except OSError as e:
+                last_exc = e
+                if attempt < max_retries - 1:
+                    time.sleep(0.1 * (attempt + 1))
+            except Exception as e:
+                logger.error("Failed to save chat session %s to disk: %s", session_id, e)
+                return
+        logger.error("Failed to save chat session %s after %d retries: %s", session_id, max_retries, last_exc)

app/services/groq_service.py ADDED Viewed

	@@ -0,0 +1,257 @@

+from typing import List, Optional, Iterator
+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, AIMessage
+import logging
+import time
+from config import (
+    GROQ_API_KEYS,
+    GROQ_MODEL,
+    JARVIS_SYSTEM_PROMPT,
+    GENERAL_CHAT_ADDENDUM,
+)
+from app.services.vector_store import VectorStoreService
+from app.utils.time_info import get_time_information
+from app.utils.retry import with_retry
+logger = logging.getLogger("J.A.R.V.I.S")
+GROQ_REQUEST_TIMEOUT = 60
+ALL_APIS_FAILED_MESSAGE = (
+    "I'm unable to process your request at the moment. All API services are "
+    "temporarily unavailable. Please try again in a few minutes."
+)
+class AllGroqApisFailedError(Exception):
+    pass
+def escape_curly_braces(text: str) -> str:
+    if not text:
+        return text
+    return text.replace("{", "{{").replace("}", "}}")
+def _is_rate_limit_error(exc: BaseException) -> bool:
+    msg = str(exc).lower()
+    return "429" in str(exc) or "rate limit" in msg or "tokens per day" in msg
+def _log_timing(label: str, elapsed: float, extra: str = ""):
+    msg = f"[TIMING] {label}: {elapsed:.3f}s"
+    if extra:
+        msg += f" ({extra})"
+    logger.info(msg)
+def _mask_api_key(key: str) -> str:
+    if not key or len(key) <= 12:
+        return "***masked***"
+    return f"{key[:8]}...{key[-4:]}"
+class GroqService:
+    def __init__(self, vector_store_service: VectorStoreService):
+        if not GROQ_API_KEYS:
+            raise ValueError(
+                "No Groq API keys configured. Set GROQ_API_KEY (and optionally GROQ_API_KEY_2, GROQ_API_KEY_3, ...) in .env"
+            )
+        self.llms = [
+            ChatGroq(
+                groq_api_key=key,
+                model_name=GROQ_MODEL,
+                temperature=0.5,
+                request_timeout=GROQ_REQUEST_TIMEOUT,
+            )
+            for key in GROQ_API_KEYS
+        ]
+        self.vector_store_service = vector_store_service
+        logger.info(f"Initialized GroqService with {len(GROQ_API_KEYS)} API key(s) (primary-first fallback)")
+    def _invoke_llm(
+        self,
+        prompt: ChatPromptTemplate,
+        messages: list,
+        question: str,
+        key_start_index: int = 0,
+    ) -> str:
+        n = len(self.llms)
+        last_exc = None
+        keys_tried = []
+        for j in range(n):
+            i = (key_start_index + j) % n
+            keys_tried.append(i)
+            masked_key = _mask_api_key(GROQ_API_KEYS[i])
+            logger.info(f"Trying API key #{i + 1}/{n}: {masked_key}")
+            def _invoke_with_key():
+                chain = prompt | self.llms[i]
+                return chain.invoke({"history": messages, "question": question})
+            try:
+                response = with_retry(
+                    _invoke_with_key,
+                    max_retries=2,
+                    initial_delay=0.5,
+                )
+                if i > 0:
+                    logger.info(f"Fallback successful: API key #{i + 1}/{n} succeeded: {masked_key}")
+                return response.content
+            except Exception as e:
+                last_exc = e
+                if _is_rate_limit_error(e):
+                    logger.warning(f"API key #{i + 1}/{n} rate limited: {masked_key}")
+                else:
+                    logger.warning(f"API key #{i + 1}/{n} failed: {masked_key} - {str(e)[:100]}")
+                if i < n - 1:
+                    logger.info("Falling back to next API key...")
+                    continue
+                break
+        masked_all = ", ".join([_mask_api_key(GROQ_API_KEYS[j]) for j in keys_tried])
+        logger.error(f"All {n} API key(s) failed. Tried: {masked_all}")
+        raise AllGroqApisFailedError(ALL_APIS_FAILED_MESSAGE) from last_exc
+    def _stream_llm(
+        self,
+        prompt: ChatPromptTemplate,
+        messages: list,
+        question: str,
+        key_start_index: int = 0,
+    ) -> Iterator[str]:
+        n = len(self.llms)
+        last_exc = None
+        for j in range(n):
+            i = (key_start_index + j) % n
+            masked_key = _mask_api_key(GROQ_API_KEYS[i])
+            logger.info(f"Streaming with API key #{i + 1}/{n}: {masked_key}")
+            try:
+                chain = prompt | self.llms[i]
+                chunk_count = 0
+                first_chunk_time = None
+                stream_start = time.perf_counter()
+                for chunk in chain.stream({"history": messages, "question": question}):
+                    content = ""
+                    if hasattr(chunk, "content"):
+                        content = chunk.content or ""
+                    elif isinstance(chunk, dict) and "content" in chunk:
+                        content = chunk.get("content", "") or ""
+                    if isinstance(content, str) and content:
+                        if first_chunk_time is None:
+                            first_chunk_time = time.perf_counter() - stream_start
+                            _log_timing("first_chunk", first_chunk_time)
+                        chunk_count += 1
+                        yield content
+                total_stream = time.perf_counter() - stream_start
+                _log_timing("groq_stream_total", total_stream, f"chunks: {chunk_count}")
+                if i > 0 and chunk_count > 0:
+                    logger.info(f"Fallback successful: API key #{i + 1}/{n} streamed: {masked_key}")
+                return
+            except Exception as e:
+                last_exc = e
+                if _is_rate_limit_error(e):
+                    logger.warning(f"API key #{i + 1}/{n} rate limited: {masked_key}")
+                else:
+                    logger.warning(f"API key #{i + 1}/{n} failed: {masked_key} - {str(e)[:100]}")
+                if i < n - 1:
+                    logger.info("Falling back to next API key for stream...")
+                    continue
+                break
+        logger.error(f"All {n} API key(s) failed during stream.")
+        raise AllGroqApisFailedError(ALL_APIS_FAILED_MESSAGE) from last_exc
+    def _build_prompt_and_messages(
+        self,
+        question: str,
+        chat_history: Optional[List[tuple]] = None,
+        extra_system_parts: Optional[List[str]] = None,
+        mode_addendum: str = "",
+    ) -> tuple:
+        context = ""
+        context_sources = []
+        t0 = time.perf_counter()
+        try:
+            retriever = self.vector_store_service.get_retriever(k=10)
+            context_docs = retriever.invoke(question)
+            if context_docs:
+                context = "\n".join([doc.page_content for doc in context_docs])
+                context_sources = [doc.metadata.get("source", "unknown") for doc in context_docs]
+                logger.info("[CONTEXT] Retrieved %d chunks from sources: %s", len(context_docs), context_sources)
+            else:
+                logger.info("[CONTEXT] No relevant chunks found for query")
+        except Exception as retrieval_err:
+            logger.warning("Vector store retrieval failed, using empty context: %s", retrieval_err)
+        finally:
+            _log_timing("vector_db", time.perf_counter() - t0)
+        time_info = get_time_information()
+        system_message = JARVIS_SYSTEM_PROMPT
+        system_message += f"\n\nCurrent time and date: {time_info}"  # Layer 2: time awareness
+        if context:
+            system_message += f"\n\nRelevant context from your learning data and past conversations:\n{escape_curly_braces(context)}"
+        if extra_system_parts:
+            system_message += "\n\n" + "\n\n".join(extra_system_parts)
+        if mode_addendum:
+            system_message += f"\n\n{mode_addendum}"
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", system_message),
+            MessagesPlaceholder(variable_name="history"),
+            ("human", "{question}"),
+        ])
+        messages = []
+        if chat_history:
+            for human_msg, ai_msg in chat_history:
+                messages.append(HumanMessage(content=human_msg))
+                messages.append(AIMessage(content=ai_msg))
+        logger.info("[PROMPT] System message length: %d chars | History pairs: %d | Question: %.100s",
+                    len(system_message), len(chat_history) if chat_history else 0, question)
+        return prompt, messages
+    def get_response(
+        self,
+        question: str,
+        chat_history: Optional[List[tuple]] = None,
+        key_start_index: int = 0,
+    ) -> str:
+        try:
+            prompt, messages = self._build_prompt_and_messages(
+                question, chat_history, mode_addendum=GENERAL_CHAT_ADDENDUM,
+            )
+            t0 = time.perf_counter()
+            result = self._invoke_llm(prompt, messages, question, key_start_index=key_start_index)
+            _log_timing("groq_api", time.perf_counter() - t0)
+            logger.info("[RESPONSE] General chat | Length: %d chars | Preview: %.120s", len(result), result)
+            return result
+        except AllGroqApisFailedError:
+            raise
+        except Exception as e:
+            raise Exception(f"Error getting response from Groq: {str(e)}") from e
+    def stream_response(
+        self,
+        question: str,
+        chat_history: Optional[List[tuple]] = None,
+        key_start_index: int = 0,
+    ) -> Iterator[str]:
+        try:
+            prompt, messages = self._build_prompt_and_messages(
+                question, chat_history, mode_addendum=GENERAL_CHAT_ADDENDUM,
+            )
+            yield {"_activity": {"event": "context_retrieved", "message": "Retrieved relevant context from knowledge base"}}
+            yield from self._stream_llm(prompt, messages, question, key_start_index=key_start_index)
+        except AllGroqApisFailedError:
+            raise
+        except Exception as e:
+            raise Exception(f"Error streaming response from Groq: {str(e)}") from e

app/services/realtime_service.py ADDED Viewed

	@@ -0,0 +1,277 @@

+from typing import List, Optional, Iterator, Tuple, Any
+from tavily import TavilyClient
+import logging
+import os
+import time
+from app.services.groq_service import GroqService, escape_curly_braces, AllGroqApisFailedError
+from app.services.vector_store import VectorStoreService
+from app.utils.retry import with_retry
+from config import REALTIME_CHAT_ADDENDUM, GROQ_API_KEYS, GROQ_MODEL
+logger = logging.getLogger("J.A.R.V.I.S")
+GROQ_REQUEST_TIMEOUT_FAST = 15
+_QUERY_EXTRACTION_PROMPT = (
+    "You are a search query optimizer. Given the user's message and recent conversation, "
+    "produce a single short, focused web search query (max 12 words) that will find the "
+    "information the user needs. Resolve any references (like 'that website', 'him', 'it') "
+    "using the conversation history. Output ONLY the search query, nothing else."
+)
+class RealtimeGroqService(GroqService):
+    def __init__(self, vector_store_service: VectorStoreService):
+        super().__init__(vector_store_service)
+        tavily_api_key = os.getenv("TAVILY_API_KEY", "")
+        if tavily_api_key:
+            self.tavily_client = TavilyClient(api_key=tavily_api_key)
+            logger.info("Tavily search client initialized successfully")
+        else:
+            self.tavily_client = None
+            logger.warning("TAVILY_API_KEY not set. Realtime search will be unavailable.")
+        if GROQ_API_KEYS:
+            from langchain_groq import ChatGroq
+            self._fast_llm = ChatGroq(
+                groq_api_key=GROQ_API_KEYS[0],
+                model_name=GROQ_MODEL,
+                temperature=0.0,
+                request_timeout=GROQ_REQUEST_TIMEOUT_FAST,
+                max_tokens=50,
+            )
+        else:
+            self._fast_llm = None
+    def _extract_search_query(
+        self, question: str, chat_history: Optional[List[tuple]] = None
+    ) -> str:
+        if not self._fast_llm:
+            return question
+        q = question.strip()
+        if len(q) <= 60 and not any(p in q.lower() for p in (" it ", " that ", " him ", " her ", " them ")):
+            return q
+        try:
+            t0 = time.perf_counter()
+            history_context = ""
+            if chat_history:
+                recent = chat_history[-3:]
+                parts = []
+                for h, a in recent:
+                    parts.append(f"User: {h[:200]}")
+                    parts.append(f"Assistant: {a[:200]}")
+                history_context = "\n".join(parts)
+            if history_context:
+                full_prompt = (
+                    f"{_QUERY_EXTRACTION_PROMPT}\n\n"
+                    f"Recent conversation:\n{history_context}\n\n"
+                    f"User's latest message: {question}\n\n"
+                    f"Search query:"
+                )
+            else:
+                full_prompt = (
+                    f"{_QUERY_EXTRACTION_PROMPT}\n\n"
+                    f"User's message: {question}\n\n"
+                    f"Search query:"
+                )
+            response = self._fast_llm.invoke(full_prompt)
+            extracted = response.content.strip().strip("'").strip('"')
+            if extracted and 3 <= len(extracted) <= 200:
+                logger.info(
+                    "[REALTIME] Query extraction: '%s' -> '%s' (%.3fs)",
+                    question[:80], extracted[:80], time.perf_counter() - t0,
+                )
+                return extracted
+            logger.warning("[REALTIME] Query extraction returned unusable result, using raw question")
+            return question
+        except Exception as e:
+            logger.warning("[REALTIME] Query extraction failed (%s), using raw question", e)
+            return question
+    def search_tavily(self, query: str, num_results: int = 7) -> Tuple[str, Optional[dict]]:
+        if not self.tavily_client:
+            logger.warning("Tavily client not initialized. TAVILY_API_KEY not set.")
+            return ("", None)
+        if not query or not str(query).strip():
+            return ("", None)
+        try:
+            t0 = time.perf_counter()
+            response = with_retry(
+                lambda: self.tavily_client.search(
+                    query=query,
+                    search_depth="fast",
+                    max_results=num_results,
+                    include_answer=True,
+                    include_raw_content=False,
+                ),
+                max_retries=3,
+                initial_delay=1.0,
+            )
+            results = response.get("results", [])
+            ai_answer = response.get("answer", "")
+            if not results and not ai_answer:
+                logger.warning("No Tavily search results for query: %s", query)
+                return ("", None)
+            payload: Optional[dict] = {
+                "query": query,
+                "answer": ai_answer,
+                "results": [
+                    {
+                        "title": r.get("title", "No title"),
+                        "content": (r.get("content") or "")[:500],
+                        "url": r.get("url", ""),
+                        "score": round(float(r.get("score", 0)), 2),
+                    }
+                    for r in results[:num_results]
+                ],
+            }
+            parts = [f"--- WEB SEARCH RESULTS FOR: {query} ---\n"]
+            if ai_answer:
+                parts.append(f"AI-SYNTHESIZED ANSWER (use this as your primary source):\n{ai_answer}\n")
+            if results:
+                parts.append("INDIVIDUAL SOURCES:")
+                for i, result in enumerate(results[:num_results], 1):
+                    title = result.get("title", "No title")
+                    content = result.get("content", "")
+                    url = result.get("url", "")
+                    score = result.get("score", 0)
+                    parts.append(f"\n[Source {i}] (relevance: {score:.2f})")
+                    parts.append(f"Title: {title}")
+                    if content:
+                        parts.append(f"Content: {content}")
+                    if url:
+                        parts.append(f"URL: {url}")
+            parts.append("\n=== END SEARCH RESULTS ===")
+            formatted = "\n".join(parts)
+            logger.info(
+                "[TAVILY] %d results, AI answer: %s, formatted: %d chars (%.3fs)",
+                len(results), "yes" if ai_answer else "no",
+                len(formatted), time.perf_counter() - t0,
+            )
+            return (formatted, payload)
+        except Exception as e:
+            logger.error("Error performing Tavily search: %s", e)
+            return ("", None)
+    def get_response(self, question: str, chat_history: Optional[List[tuple]] = None, key_start_index: int = 0) -> str:
+        try:
+            search_query = self._extract_search_query(question, chat_history)
+            logger.info("[REALTIME] Searching Tavily for: %s", search_query)
+            formatted_results, _ = self.search_tavily(search_query, num_results=7)
+            if formatted_results:
+                logger.info("[REALTIME] Tavily returned results (length: %d chars)", len(formatted_results))
+            else:
+                logger.warning("[REALTIME] Tavily returned no results for: %s", search_query)
+            extra_parts = [escape_curly_braces(formatted_results)] if formatted_results else None
+            prompt, messages = self._build_prompt_and_messages(
+                question, chat_history,
+                extra_system_parts=extra_parts,
+                mode_addendum=REALTIME_CHAT_ADDENDUM,
+            )
+            t0 = time.perf_counter()
+            response_content = self._invoke_llm(prompt, messages, question, key_start_index=key_start_index)
+            logger.info("[TIMING] groq_api: %.3fs", time.perf_counter() - t0)
+            logger.info(
+                "[RESPONSE] Realtime chat | Length: %d chars | Preview: %.120s",
+                len(response_content), response_content,
+            )
+            return response_content
+        except AllGroqApisFailedError:
+            raise
+        except Exception as e:
+            logger.error("Error in realtime get_response: %s", e, exc_info=True)
+            raise
+    def prefetch_web_search(
+        self, question: str, chat_history: Optional[List[tuple]] = None
+    ) -> Tuple[str, Optional[dict]]:
+        try:
+            t0 = time.perf_counter()
+            search_query = self._extract_search_query(question, chat_history)
+            logger.info("[REALTIME] Pre-fetch: extracted query '%s' in %.3fs", search_query[:60], time.perf_counter() - t0)
+            formatted_results, payload = self.search_tavily(search_query, num_results=7)
+            if formatted_results:
+                logger.info("[REALTIME] Pre-fetch: Tavily returned %d chars in %.3fs total",
+                            len(formatted_results), time.perf_counter() - t0)
+            return (formatted_results or "", payload)
+        except Exception as e:
+            logger.warning("[REALTIME] Pre-fetch failed: %s", e)
+            return ("", None)
+    def stream_response(self, question: str, chat_history: Optional[List[tuple]] = None, key_start_index: int = 0) -> Iterator[Any]:
+        try:
+            yield {"_activity": {"event": "extracting_query", "message": "Extracting search query..."}}
+            search_query = self._extract_search_query(question, chat_history)
+            logger.info("[REALTIME] Searching Tavily for: %s", search_query)
+            yield {"_activity": {"event": "searching_web", "query": search_query, "message": f"Searching web for: {search_query}"}}
+            formatted_results, payload = self.search_tavily(search_query, num_results=7)
+            num_results = len(payload.get("results", [])) if payload else 0
+            if formatted_results:
+                logger.info("[REALTIME] Tavily returned results (length: %d chars)", len(formatted_results))
+                yield {"_activity": {"event": "search_completed", "message": f"Search completed: {num_results} results, {len(formatted_results)} chars of context"}}
+            else:
+                logger.warning("[REALTIME] Tavily returned no results for: %s", search_query)
+                yield {"_activity": {"event": "search_completed", "message": "No search results found"}}
+            if payload:
+                yield {"_search_results": payload}
+            extra_parts = [escape_curly_braces(formatted_results)] if formatted_results else None
+            prompt, messages = self._build_prompt_and_messages(
+                question, chat_history,
+                extra_system_parts=extra_parts,
+                mode_addendum=REALTIME_CHAT_ADDENDUM,
+            )
+            yield from self._stream_llm(prompt, messages, question, key_start_index=key_start_index)
+            logger.info("[REALTIME] Stream completed for: %s", search_query)
+        except AllGroqApisFailedError:
+            raise
+        except Exception as e:
+            logger.error("Error in realtime stream_response: %s", e, exc_info=True)
+            raise
+    def stream_response_with_prefetched(
+        self,
+        question: str,
+        chat_history: Optional[List[tuple]] = None,
+        formatted_results: Optional[str] = None,
+        payload: Optional[dict] = None,
+        key_start_index: int = 0,
+    ) -> Iterator[Any]:
+        try:
+            extra_parts = [escape_curly_braces(formatted_results)] if formatted_results else None
+            prompt, messages = self._build_prompt_and_messages(
+                question, chat_history,
+                extra_system_parts=extra_parts,
+                mode_addendum=REALTIME_CHAT_ADDENDUM,
+            )
+            yield from self._stream_llm(prompt, messages, question, key_start_index=key_start_index)
+            logger.info("[REALTIME] Stream completed (pre-fetched results)")
+        except AllGroqApisFailedError:
+            raise
+        except Exception as e:
+            logger.error("Error in realtime stream_response_with_prefetched: %s", e, exc_info=True)
+            raise

app/services/vector_store.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import json
+import logging
+from pathlib import Path
+from typing import List, Optional
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
+from config import (
+    LEARNING_DATA_DIR,
+    CHATS_DATA_DIR,
+    VECTOR_STORE_DIR,
+    EMBEDDING_MODEL,
+    CHUNK_SIZE,
+    CHUNK_OVERLAP,
+)
+logger = logging.getLogger("J.A.R.V.I.S")
+class VectorStoreService:
+    def __init__(self):
+        self.embeddings = HuggingFaceEmbeddings(
+            model_name=EMBEDDING_MODEL,
+            model_kwargs={"device": "cpu"},
+        )
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=CHUNK_SIZE,
+            chunk_overlap=CHUNK_OVERLAP,
+        )
+        self.vector_store: Optional[FAISS] = None
+        self._retriever_cache: dict = {}
+    def load_learning_data(self) -> List[Document]:
+        documents = []
+        for file_path in sorted(LEARNING_DATA_DIR.glob("*.txt")):
+            try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    content = f.read().strip()
+                    if content:
+                        documents.append(Document(page_content=content, metadata={"source": str(file_path.name)}))
+                        logger.info("[VECTOR] Loaded learning data: %s (%d chars)", file_path.name, len(content))
+            except Exception as e:
+                logger.warning("Could not load learning data file %s: %s", file_path, e)
+        logger.info("[VECTOR] Total learning data files loaded: %d", len(documents))
+        return documents
+    def load_chat_history(self) -> List[Document]:
+        documents = []
+        for file_path in sorted(CHATS_DATA_DIR.glob("*.json")):
+            try:
+                with open(file_path, "r", encoding="utf-8") as f:
+                    chat_data = json.load(f)
+                messages = chat_data.get("messages", [])
+                if not isinstance(messages, list):
+                    continue
+                lines = []
+                for msg in messages:
+                    if not isinstance(msg, dict):
+                        continue
+                    role = msg.get("role") or "assistant"
+                    content = msg.get("content") or ""
+                    prefix = "User: " if role == "user" else "Assistant: "
+                    lines.append(prefix + content)
+                chat_content = "\n".join(lines)
+                if chat_content.strip():
+                    documents.append(Document(page_content=chat_content, metadata={"source": f"chat_{file_path.stem}"}))
+                    logger.info("[VECTOR] Loaded chat history: %s (%d messages)", file_path.name, len(messages))
+            except Exception as e:
+                logger.warning("Could not load chat history file %s: %s", file_path, e)
+        logger.info("[VECTOR] Total chat history files loaded: %d", len(documents))
+        return documents
+    def create_vector_store(self) -> FAISS:
+        learning_docs = self.load_learning_data()
+        chat_docs = self.load_chat_history()
+        all_documents = learning_docs + chat_docs
+        logger.info("[VECTOR] Total documents to index: %d (learning: %d, chat: %d)",
+                    len(all_documents), len(learning_docs), len(chat_docs))
+        if not all_documents:
+            self.vector_store = FAISS.from_texts(["No data available yet."], self.embeddings)
+            logger.info("[VECTOR] No documents found, created placeholder index")
+        else:
+            chunks = self.text_splitter.split_documents(all_documents)
+            logger.info("[VECTOR] Split into %d chunks (chunk_size=%d, overlap=%d)",
+                        len(chunks), CHUNK_SIZE, CHUNK_OVERLAP)
+            self.vector_store = FAISS.from_documents(chunks, self.embeddings)
+            logger.info("[VECTOR] FAISS index built successfully with %d vectors", len(chunks))
+        self._retriever_cache.clear()
+        self.save_vector_store()
+        return self.vector_store
+    def save_vector_store(self):
+        if self.vector_store:
+            try:
+                self.vector_store.save_local(str(VECTOR_STORE_DIR))
+            except Exception as e:
+                logger.error("Failed to save vector store to disk: %s", e)
+    def get_retriever(self, k: int = 10):
+        if not self.vector_store:
+            raise RuntimeError("Vector store not initialized. This should not happen.")
+        if k not in self._retriever_cache:
+            self._retriever_cache[k] = self.vector_store.as_retriever(search_kwargs={"k": k})
+        return self._retriever_cache[k]

app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+UTILITIES PACKAGE
+=================
+Helpers used by the services (no HTTP, no business logic):
+  time_info  - get_time_information(): returns a string with current date/time for the LLM prompt.
+  retry      - with_retry(fn): calls fn(); on failure retries with exponential backoff (Groq/Tavily).
+"""

app/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (449 Bytes). View file

app/utils/__pycache__/key_rotation.cpython-312.pyc ADDED Viewed

Binary file (979 Bytes). View file

app/utils/__pycache__/retry.cpython-312.pyc ADDED Viewed

Binary file (1.29 kB). View file

app/utils/__pycache__/time_info.cpython-312.pyc ADDED Viewed

Binary file (1.3 kB). View file

app/utils/key_rotation.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import threading
+from typing import Tuple, Optional
+_counter = 0
+_lock = threading.Lock()
+def get_next_key_pair(n_keys: int, need_brain: bool = True) -> Tuple[Optional[int], int]:
+    global _counter
+    if n_keys <= 0:
+        return (None, 0)
+    with _lock:
+        if need_brain:
+            if n_keys >= 2:
+                brain = _counter % n_keys
+                chat = (_counter + 1) % n_keys
+                _counter += 2
+                return (brain, chat)
+            else:
+                _counter += 1
+                return (0, 0)
+        else:
+            chat = _counter % n_keys
+            _counter += 1
+            return (None, chat)

app/utils/retry.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import logging
+import time
+from typing import TypeVar, Callable
+logger = logging.getLogger("J.A.R.V.I.S")
+T = TypeVar("T")
+def with_retry(
+    fn: Callable[[], T],
+    max_retries: int = 3,
+    initial_delay: float = 1.0,
+) -> T:
+    if max_retries <= 0:
+        return fn()
+    last_exception = None
+    delay = initial_delay
+    for attempt in range(max_retries):
+        try:
+            return fn()
+        except Exception as e:
+            last_exception = e
+            if attempt == max_retries - 1:
+                raise
+            logger.warning(
+                "Attempt %s/%s failed (%s). Retrying in %.1fs: %s",
+                attempt + 1,
+                max_retries,
+                fn.__name__ if hasattr(fn, "__name__") else "call",
+                delay,
+                e,
+            )
+            time.sleep(delay)
+            delay *= 2
+    raise last_exception

app/utils/time_info.py ADDED Viewed

	@@ -0,0 +1,22 @@

+"""
+TIME INFORMATION UTILITY
+========================
+Returns a short, readable string with the current date and time. This is
+injected into the system prompt so the LLM can answer "what day is it?"
+and similar questions. Called by both GroqService and RealtimeGroqService.
+"""
+import datetime
+def get_time_information() -> str:
+    """Return a few lines of text: day name, date, month, year, and time (24h)."""
+    now = datetime.datetime.now()
+    return (
+        f"Current Real-time Information:\n"
+        f"Day: {now.strftime('%A')}\n"      # e.g. Monday
+        f"Date: {now.strftime('%d')}\n"     # e.g. 05
+        f"Month: {now.strftime('%B')}\n"    # e.g. February
+        f"Year: {now.strftime('%Y')}\n"     # e.g. 2026
+        f"Time: {now.strftime('%H')} hours, {now.strftime('%M')} minutes, {now.strftime('%S')} seconds\n"
+    )