Spaces:

Fayza38
/

intervision-question_answer-api

Sleeping

App Files Files Community

Fayza38 commited on 16 days ago

Commit

a8cac7f

verified ·

1 Parent(s): 1bb94e9

Update behavioral and technical id

Browse files

Files changed (1) hide show

main.py +267 -333

main.py CHANGED Viewed

@@ -1,333 +1,267 @@
-# =========================================
-# 1. IMPORTS
-# =========================================
-import asyncio
-import os
-import json
-import uuid
-import cloudinary
-import cloudinary.uploader
-import firebase_admin
-from firebase_admin import credentials, firestore
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from pydantic import BaseModel
-from gradio_client import Client
-from google.cloud.firestore_v1.base_query import FieldFilter
-import edge_tts
-from dotenv import load_dotenv
-# =========================================
-# 2. INITIALIZATIONS
-# =========================================
-if not firebase_admin._apps:
-    fb_json = os.getenv("FIREBASE_JSON")
-    if fb_json:
-        cred_dict = json.loads(fb_json)
-        cred = credentials.Certificate(cred_dict)
-    else:
-        cred = credentials.Certificate("serviceAccountKey.json")
-    firebase_admin.initialize_app(cred)
-db = firestore.client()
-# Load environment variables
-load_dotenv()
-# Cloudinary Configuration
-cloudinary.config(
-    cloud_name=os.getenv("CLOUD_NAME"),
-    api_key=os.getenv("API_KEY"),
-    api_secret=os.getenv("API_SECRET"),
-    secure=True
-)
-app = FastAPI(title="AI Question Service")
-HF_SPACE = "Fayza38/Question_and_answer_model"
-client = None
-# =========================================
-# 3. MODELS & CONSTANTS
-# =========================================
-TECH_CATEGORIES = {0: "Security",
-                    1: "BackEnd",
-                    2: "Networking",
-                    3: "FrontEnd",
-                    4: "DataEngineering",
-                    5: "WebDevelopment",
-                    6: "FullStack",
-                    7: "VersionControl",
-                    8: "SystemDesign",
-                    9: "MachineLearning",
-                    10: "LanguagesAndFrameworks",
-                    11: "DatabaseSystems",
-                    12: "ArtificialIntelligence",
-                    13: "SoftwareTesting",
-                    14: "DistributedSystems",
-                    15: "DevOps",
-                    16: "LowLevelSystems",
-                    17: "DatabaseAndSql",
-                    18: "GeneralProgramming",
-                    19: "DataStructures",
-                    20: "Algorithms"}
-DIFFICULTY_MAP = {0: "Easy", 1: "Intermediate", 2: "Hard"}
-SESSION_TYPE_MAP = {0: "Technical", 1: "Behavioral"}
-class GenerateSessionRequest(BaseModel):
-    sessionId: str
-    sessionType: int
-    difficultyLevel: int = 0
-    trackName: int
-class CleanupRequest(BaseModel):
-    audioUrls: list[str]
-# =========================================
-# 4. STARTUP EVENT
-# =========================================
-@app.on_event("startup")
-async def startup_event():
-    global client
-    max_retries = 5
-    retry_delay = 10
-    print("Connecting to Hugging Face Space...")
-    for i in range(max_retries):
-        try:
-            client = Client(HF_SPACE)
-            print("Connected Successfully!")
-            break
-        except Exception as e:
-            print(f"Connection attempt {i+1} failed. Retrying in {retry_delay}s...")
-            if i < max_retries - 1: await asyncio.sleep(retry_delay)
-# =========================================
-# 5. HELPERS
-# =========================================
-async def generate_audio(text, filename):
-    try:
-        # Rate is set to -10% to make the voice slightly slower and clearer
-        communicate = edge_tts.Communicate(text, "en-US-GuyNeural", rate="-15%")
-        await communicate.save(filename)
-        # Upload to Cloudinary
-        upload_result = cloudinary.uploader.upload(
-            filename,
-            resource_type="video",
-            folder="interview_audio"
-        )
-        if os.path.exists(filename): os.remove(filename)
-        return upload_result["secure_url"]
-    except Exception as e:
-        print(f"Audio Generation Error: {e}")
-        if os.path.exists(filename): os.remove(filename)
-        return None
-async def safe_generate(prompt, retries=3):
-    if client is None: raise Exception("Gradio Client not initialized")
-    for attempt in range(retries):
-        try:
-            loop = asyncio.get_running_loop()
-            return await loop.run_in_executor(None, lambda: client.predict(prompt=prompt, api_name="/generate_questions"))
-        except Exception as e:
-            if attempt == retries - 1: raise e
-            await asyncio.sleep(2)
-def parse_question_output(raw_output: str):
-    if not raw_output: return None, None
-    text = raw_output.split("assistant")[-1].strip() if "assistant" in raw_output else raw_output
-    if "Q:" in text and "A:" in text:
-        try:
-            parts = text.split("A:")
-            q = parts[0].replace("Q:", "").strip()
-            a = parts[1].split("<|im_end|>")[0].strip()
-            return q, a
-        except: return None, None
-    return None, None
-# =========================================
-# 6. REFILL & PREFILL LOGIC
-# =========================================
-async def refill_specific_pool(track_id: int, difficulty: int, count: int, session_type: int = 0):
-    global client
-    while client is None: await asyncio.sleep(5)
-    # Technical (0) vs Behavioral (1)
-    if session_type == 1:
-        prompt = "Generate ONE unique behavioral interview question (soft skills, situational). Format: Q: [Question] A: [Answer]"
-        track_text = "Behavioral"
-        level_text = "General"
-    else:
-        track_text = TECH_CATEGORIES.get(track_id)
-        level_text = DIFFICULTY_MAP.get(difficulty)
-        prompt = f"Generate ONE unique {track_text} interview question for {level_text} level. Format: Q: [Question] A: [Answer]"
-    success_count = 0
-    while success_count < count:
-        try:
-            raw_output = await safe_generate(prompt)
-            q_text, a_text = parse_question_output(raw_output)
-            if q_text and a_text:
-                filename = f"{uuid.uuid4()}.mp3"
-                audio_url = await generate_audio(q_text, filename)
-                if audio_url:
-                    db.collection("questions_pool").add({
-                        "session_type": session_type,
-                        "track_id": track_id if session_type == 0 else -1,
-                        "difficulty": difficulty if session_type == 0 else 0,
-                        "questionText": q_text,
-                        "questionIdealAnswer": a_text,
-                        "audio_url": audio_url,
-                        "created_at": firestore.SERVER_TIMESTAMP
-                    })
-                    success_count += 1
-                    print(f"[{success_count}/{count}] Refilled: {track_text}")
-                    await asyncio.sleep(2)
-        except Exception as e:
-            print(f"Error in refill: {e}")
-            await asyncio.sleep(5)
-# =========================================
-# 6. ENDPOINTS
-# =========================================
-@app.post("/generate-session")
-async def generate_session(request: GenerateSessionRequest, background_tasks: BackgroundTasks):
-    t_id, diff = request.trackName, request.difficultyLevel
-    s_type = request.sessionType # 0: Technical, 1: Behavioral
-    # Query based on the new session types (0 or 1)
-    query = db.collection("questions_pool").where(filter=FieldFilter("session_type", "==", s_type))
-    if s_type == 0: # Technical
-        query = query.where(filter=FieldFilter("track_id", "==", t_id)) \
-                    .where(filter=FieldFilter("difficulty", "==", diff))
-    docs_query = query.limit(10).get()
-    final_questions = []
-    for index, doc in enumerate(docs_query, start=1):
-        data = doc.to_dict()
-        final_questions.append({
-            "question_id": index,
-            "text": data["questionText"],
-            "expected_answer": data["questionIdealAnswer"],
-            "audio_url": data.get("audio_url", "")
-        })
-        # Delete after fetching to ensure questions are unique for next users
-        db.collection("questions_pool").document(doc.id).delete()
-    # Maintenance task to keep the pool full
-    async def maintain_stock():
-        agg_query = query.count()
-        current_count = agg_query.get()[0][0].value
-        target = 50
-        if current_count < target:
-            await refill_specific_pool(t_id, diff, target - current_count, session_type=s_type)
-    background_tasks.add_task(maintain_stock)
-    if not final_questions:
-        raise HTTPException(status_code=503, detail="Pool empty for this type.")
-    return {"session_id": request.sessionId, "questions": final_questions}
-@app.get("/system-cleanup")
-async def system_cleanup(background_tasks: BackgroundTasks):
-    """Scan and delete all questions with missing or invalid audio URLs"""
-    def run_cleanup():
-        print("Starting System Cleanup...")
-        # Get all documents in the pool
-        docs = db.collection("questions_pool").get()
-        deleted_count = 0
-        for doc in docs:
-            data = doc.to_dict()
-            # Check if audio_url is missing, None, or empty string
-            if not data.get("audio_url") or data.get("audio_url") == "":
-                db.collection("questions_pool").document(doc.id).delete()
-                deleted_count += 1
-        print(f"Cleanup finished! Deleted {deleted_count} broken questions.")
-    background_tasks.add_task(run_cleanup)
-    return {"message": "Cleanup started in background. Check your console/logs."}
-@app.post("/cleanup-audio")
-async def cleanup_audio(request: CleanupRequest, background_tasks: BackgroundTasks):
-    def delete_job(urls):
-        for url in urls:
-            try:
-                public_id = "interview_audio/" + url.split('/')[-1].split('.')[0]
-                cloudinary.uploader.destroy(public_id, resource_type="video")
-                print(f"Deleted: {public_id}")
-            except Exception: pass
-    background_tasks.add_task(delete_job, request.audioUrls)
-    return {"message": "Cleanup started"}
-# @app.get("/trigger-full-prefill")
-# async def trigger_full_prefill(background_tasks: BackgroundTasks):
-#     """Prefills 30 questions for every track and every difficulty level"""
-#     async def full_prefill_task():
-#         for t_id in TECH_CATEGORIES.keys():
-#             for diff in DIFFICULTY_MAP.keys():
-#                 print(f"Starting full prefill for Track {t_id}, Level {diff}")
-#                 await refill_specific_pool(t_id, diff, 30)
-#     background_tasks.add_task(full_prefill_task)
-#     return {"message": "Full system prefill started in background (30 questions per track/level)"}
-#?##############################################################################
-# @app.get("/trigger-behavioral-prefill")
-# async def trigger_behavioral_prefill(background_tasks: BackgroundTasks):
-#     """Prefills 30 Behavioral questions (No track or difficulty needed)"""
-#     async def run_behavioral_task():
-#         print("Starting Behavioral questions prefill...")
-#         await refill_specific_pool(track_id=0, difficulty=0, count=30, session_type=2)
-#         print("Finished prefilling 30 Behavioral questions!")
-#     background_tasks.add_task(run_behavioral_task)
-#     return {"message": "Behavioral prefill (30 questions) started in background."}
-@app.get("/health")
-async def health(): return {"status": "running", "hf_connected": client is not None}
-#?##########################################################################
-# @app.get("/final-migration-fix")
-# async def final_migration_fix(background_tasks: BackgroundTasks):
-#     def run_fix():
-#         print("🔄 Starting Final Data Fix...")
-#         docs = db.collection("questions_pool").get()
-#         updated_count = 0
-#         for doc in docs:
-#             data = doc.to_dict()
-#             updates = {}
-#             # 1. تصحيح الـ session_type (Technical: 0, Behavioral: 1)
-#             # لو كان 1 (قديم) خليه 0، ولو كان 2 (قديم) خليه 1
-#             curr_type = data.get("session_type")
-#             if curr_type == 1: updates["session_type"] = 0
-#             elif curr_type == 2: updates["session_type"] = 1
-#             # 2. تصحيح الـ difficulty (Easy: 0, Intermediate: 1, Hard: 2)
-#             # الأسئلة القديمة كانت 1 و 2 و 3، هننقص منها 1
-#             curr_diff = data.get("difficulty")
-#             if curr_diff in [1, 2, 3]:
-#                 updates["difficulty"] = curr_diff - 1
-#             if updates:
-#                 db.collection("questions_pool").document(doc.id).update(updates)
-#                 updated_count += 1
-#         print(f"✅ Final Fix Done! Updated {updated_count} questions.")
-#     background_tasks.add_task(run_fix)
-#     return {"message": "Final migration started. Your pool will be ready in a minute!"}

+# =========================================
+# 1. IMPORTS
+# =========================================
+import asyncio
+import os
+import json
+import uuid
+import cloudinary
+import cloudinary.uploader
+import firebase_admin
+from firebase_admin import credentials, firestore
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from pydantic import BaseModel
+from gradio_client import Client
+from google.cloud.firestore_v1.base_query import FieldFilter
+import edge_tts
+from typing import Optional
+from dotenv import load_dotenv
+from contextlib import asynccontextmanager
+# =========================================
+# 2. INITIALIZATIONS & CONFIG
+# =========================================
+load_dotenv()
+if not firebase_admin._apps:
+    fb_json = os.getenv("FIREBASE_JSON")
+    if fb_json:
+        cred_dict = json.loads(fb_json)
+        cred = credentials.Certificate(cred_dict)
+    else:
+        cred = credentials.Certificate("serviceAccountKey.json")
+    firebase_admin.initialize_app(cred)
+db = firestore.client()
+cloudinary.config(
+    cloud_name=os.getenv("CLOUD_NAME"),
+    api_key=os.getenv("API_KEY"),
+    api_secret=os.getenv("API_SECRET"),
+    secure=True
+)
+HF_SPACE = "Fayza38/Question_and_answer_model"
+client = None
+# =========================================
+# 3. MODELS & CONSTANTS
+# =========================================
+TECH_CATEGORIES = {
+    0: "Security", 1: "BackEnd", 2: "Networking", 3: "FrontEnd",
+    4: "DataEngineering", 5: "WebDevelopment", 6: "FullStack",
+    7: "VersionControl", 8: "SystemDesign", 9: "MachineLearning",
+    10: "LanguagesAndFrameworks", 11: "DatabaseSystems",
+    12: "ArtificialIntelligence", 13: "SoftwareTesting",
+    14: "DistributedSystems", 15: "DevOps", 16: "LowLevelSystems",
+    17: "DatabaseAndSql", 18: "GeneralProgramming",
+    19: "DataStructures", 20: "Algorithms"
+}
+DIFFICULTY_MAP = {0: "Easy", 1: "Intermediate", 2: "Hard"}
+class GenerateSessionRequest(BaseModel):
+    sessionId: str
+    sessionType: int           # 0: Behavioral, 1: Technical
+    difficultyLevel: int = 0
+    trackName: Optional[int] = None
+class CleanupRequest(BaseModel):
+    audioUrls: list[str]
+# =========================================
+# 4. BACKGROUND TASKS (Auto-Cleaner)
+# =========================================
+async def auto_clean_invalid_questions():
+    """Background loop to remove questions with missing or broken audio."""
+    while True:
+        try:
+            print("[Auto-Cleaner] Scanning for broken questions...")
+            docs = db.collection("questions_pool").get()
+            deleted_count = 0
+            for doc in docs:
+                data = doc.to_dict()
+                if not data.get("audio_url"):
+                    db.collection("questions_pool").document(doc.id).delete()
+                    deleted_count += 1
+            if deleted_count > 0:
+                print(f"[Auto-Cleaner] Removed {deleted_count} broken questions.")
+        except Exception as e:
+            print(f"[Auto-Cleaner] Error: {e}")
+        await asyncio.sleep(600) # Scan every 10 minutes
+# =========================================
+# 5. LIFESPAN MANAGEMENT
+# =========================================
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global client
+    print("Connecting to Hugging Face Space...")
+    try:
+        loop = asyncio.get_event_loop()
+        client = await loop.run_in_executor(None, lambda: Client(HF_SPACE))
+        print("Connected Successfully!")
+        # Start the background cleaner
+        asyncio.create_task(auto_clean_invalid_questions())
+    except Exception as e:
+        print(f"Startup Connection failed: {e}")
+    yield
+    print("Shutting down Intervision Service...")
+app = FastAPI(title="Intervision AI Question Service", lifespan=lifespan)
+# =========================================
+# 6. HELPERS
+# =========================================
+async def generate_audio(text, filename):
+    try:
+        communicate = edge_tts.Communicate(text, "en-US-GuyNeural", rate="-15%")
+        await communicate.save(filename)
+        upload_result = cloudinary.uploader.upload(
+            filename, resource_type="video", folder="interview_audio"
+        )
+        if os.path.exists(filename): os.remove(filename)
+        return upload_result["secure_url"]
+    except Exception as e:
+        print(f"Audio Error: {e}")
+        if os.path.exists(filename): os.remove(filename)
+        return None
+async def safe_generate(prompt, retries=3):
+    if client is None: raise Exception("Gradio Client not initialized")
+    for attempt in range(retries):
+        try:
+            loop = asyncio.get_running_loop()
+            return await loop.run_in_executor(None, lambda: client.predict(prompt=prompt, api_name="/generate_questions"))
+        except Exception as e:
+            if attempt == retries - 1: raise e
+            await asyncio.sleep(2)
+def parse_question_output(raw_output: str):
+    if not raw_output: return None, None
+    text = raw_output.split("assistant")[-1].strip() if "assistant" in raw_output else raw_output
+    if "Q:" in text and "A:" in text:
+        try:
+            parts = text.split("A:")
+            q = parts[0].replace("Q:", "").strip()
+            a = parts[1].split("<|im_end|>")[0].strip()
+            return q, a
+        except: return None, None
+    return None, None
+async def refill_specific_pool(track_id: int, difficulty: int, count: int, session_type: int = 1):
+    while client is None: await asyncio.sleep(5)
+    if session_type == 0:
+        prompt = ("Generate ONE simple Behavioral interview question for a fresh graduate. "
+                "Focus on soft skills like teamwork or leadership. Strictly NO technical questions. "
+                "Format: Q: [Question] A: [Answer]")
+        track_text = "Behavioral"
+    else:
+        track_text = TECH_CATEGORIES.get(track_id)
+        level_text = DIFFICULTY_MAP.get(difficulty)
+        prompt = f"Generate ONE unique {track_text} interview question for {level_text} level. Format: Q: [Question] A: [Answer]"
+    success_count = 0
+    while success_count < count:
+        try:
+            raw_output = await safe_generate(prompt)
+            q_text, a_text = parse_question_output(raw_output)
+            if q_text and a_text:
+                filename = f"{uuid.uuid4()}.mp3"
+                audio_url = await generate_audio(q_text, filename)
+                if audio_url:
+                    db.collection("questions_pool").add({
+                        "session_type": session_type,
+                        "track_id": track_id if session_type == 1 else -1,
+                        "difficulty": difficulty if session_type == 1 else 0,
+                        "questionText": q_text,
+                        "questionIdealAnswer": a_text,
+                        "audio_url": audio_url,
+                        "created_at": firestore.SERVER_TIMESTAMP
+                    })
+                    success_count += 1
+                    print(f"Refilled {success_count}/{count} for {track_text}")
+                    await asyncio.sleep(3)
+        except Exception as e:
+            print(f"Refill error: {e}")
+            await asyncio.sleep(5)
+# =========================================
+# 7. MAIN ENDPOINTS
+# =========================================
+@app.post("/generate-session")
+async def generate_session(request: GenerateSessionRequest, background_tasks: BackgroundTasks):
+    s_type, diff, t_id = request.sessionType, request.difficultyLevel, request.trackName
+    query = db.collection("questions_pool").where(filter=FieldFilter("session_type", "==", s_type))
+    if s_type == 1: # Technical
+        if t_id is None: raise HTTPException(status_code=400, detail="trackName required for technical.")
+        query = query.where(filter=FieldFilter("track_id", "==", t_id)).where(filter=FieldFilter("difficulty", "==", diff))
+    docs_query = query.limit(10).get()
+    final_questions = []
+    for index, doc in enumerate(docs_query, start=1):
+        data = doc.to_dict()
+        final_questions.append({
+            "question_id": index, "text": data["questionText"],
+            "expected_answer": data["questionIdealAnswer"], "audio_url": data.get("audio_url", "")
+        })
+        db.collection("questions_pool").document(doc.id).delete()
+    async def maintain_stock():
+        snap = query.count().get()
+        current = snap[0][0].value
+        if current < 50:
+            await refill_specific_pool(t_id if s_type == 1 else -1, diff, 50 - current, session_type=s_type)
+    background_tasks.add_task(maintain_stock)
+    if not final_questions:
+        raise HTTPException(status_code=503, detail="Question pool is currently empty.")
+    return {"session_id": request.sessionId, "questions": final_questions}
+@app.post("/cleanup-audio")
+async def cleanup_audio(request: CleanupRequest, background_tasks: BackgroundTasks):
+    def delete_job(urls):
+        for url in urls:
+            try:
+                public_id = "interview_audio/" + url.split('/')[-1].split('.')[0]
+                cloudinary.uploader.destroy(public_id, resource_type="video")
+            except: pass
+    background_tasks.add_task(delete_job, request.audioUrls)
+    return {"message": "Cleanup started"}
+@app.get("/system-cleanup")
+async def system_cleanup(background_tasks: BackgroundTasks):
+    """Scan and delete all questions with missing or invalid audio URLs"""
+    def run_cleanup():
+        print("Starting System Cleanup...")
+        # Get all documents in the pool
+        docs = db.collection("questions_pool").get()
+        deleted_count = 0
+        for doc in docs:
+            data = doc.to_dict()
+            # Check if audio_url is missing, None, or empty string
+            if not data.get("audio_url") or data.get("audio_url") == "":
+                db.collection("questions_pool").document(doc.id).delete()
+                deleted_count += 1
+        print(f"Cleanup finished! Deleted {deleted_count} broken questions.")
+    background_tasks.add_task(run_cleanup)
+    return {"message": "Cleanup started in background. Check your console/logs."}
+@app.get("/health")
+async def health():
+    return {"status": "active", "hf_connected": client is not None}
+@app.get("/")
+async def root():
+    return {"app": "Intervision AI Service","Status": "Running.."}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)