Spaces:

1337XCode
/

personabot-api

Running

App Files Files Community

GitHub Actions commited on 16 days ago

Commit

5941cd9

1 Parent(s): d1df4ad

Deploy a9a1f6e

Browse files

Files changed (8) hide show

app/core/config.py +0 -8
app/main.py +1 -32
app/pipeline/graph.py +1 -1
app/pipeline/nodes/log_eval.py +56 -44
app/services/conversation_store.py +1 -78
app/services/loki_sink.py +119 -0
tests/test_log_eval_privacy.py +2 -3
tests/test_loki_sink.py +168 -0

app/core/config.py CHANGED Viewed

@@ -55,14 +55,6 @@ class Settings(BaseSettings):
     GEMINI_MODEL: str = "gemini-2.5-flash-lite"
     GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"
-    # Durable GitHub interaction log — survives HF Space restarts.
-    # PERSONABOT_WRITE_TOKEN: fine-grained PAT with read+write Contents access
-    # on the PersonaBot repo.  When set, every interaction is appended to
-    # data/interactions.jsonl in the repo so training signals persist.
-    # Leave unset in local dev (interactions stay in SQLite only).
-    PERSONABOT_WRITE_TOKEN: Optional[str] = None
-    PERSONABOT_REPO: str = "1337Xcode/PersonaBot"
     # HuggingFace Space model servers.
     # In local env, embedder/reranker run in-process (these URLs are ignored).
     # In prod, the API Space calls the HF embedder/reranker Spaces via HTTP.

     GEMINI_MODEL: str = "gemini-2.5-flash-lite"
     GEMINI_CONTEXT_PATH: str = "backend/app/services/gemini_context.toon"
     # HuggingFace Space model servers.
     # In local env, embedder/reranker run in-process (these URLs are ignored).
     # In prod, the API Space calls the HF embedder/reranker Spaces via HTTP.

app/main.py CHANGED Viewed

@@ -23,7 +23,6 @@ from app.pipeline.graph import build_pipeline
 from app.security.rate_limiter import limiter, custom_rate_limit_handler
 from app.services.embedder import Embedder
 from app.services.gemini_client import GeminiClient
-from app.services.github_log import GithubLog
 from app.services.llm_client import get_llm_client, TpmBucket
 from app.services.reranker import Reranker
 from app.services.semantic_cache import SemanticCache
@@ -69,17 +68,6 @@ def _normalize_qdrant_url(url: str) -> str:
     return raw
-def _sqlite_row_count(db_path: str) -> int:
-    """Return the current interactions row count, or 0 if the table doesn't exist."""
-    try:
-        with sqlite3.connect(db_path) as conn:
-            return conn.execute("SELECT COUNT(*) FROM interactions").fetchone()[0]
-    except sqlite3.OperationalError:
-        return 0
-    except Exception:
-        return 0
 async def _qdrant_keepalive_loop(
     qdrant: QdrantClient,
     interval_seconds: int,
@@ -112,31 +100,13 @@ async def lifespan(app: FastAPI):
     settings = get_settings()
     logger.info("Starting PersonaBot API | env=%s", settings.ENVIRONMENT)
-    # Durable GitHub interaction log — survives HF Space restarts.
-    # When PERSONABOT_WRITE_TOKEN is not set (local dev), GithubLog.enabled=False
-    # and all append calls are silent no-ops.
-    github_log = GithubLog(
-        write_token=settings.PERSONABOT_WRITE_TOKEN or "",
-        repo=settings.PERSONABOT_REPO,
-    )
-    app.state.github_log = github_log
     # Attach the in-memory semantic cache. No external service required.
     app.state.semantic_cache = SemanticCache(
         max_size=settings.SEMANTIC_CACHE_SIZE,
         ttl_seconds=settings.SEMANTIC_CACHE_TTL_SECONDS,
         similarity_threshold=settings.SEMANTIC_CACHE_SIMILARITY_THRESHOLD,
     )
-    app.state.conversation_store = ConversationStore(settings.DB_PATH, github_log=github_log)
-    # Issue 1: reconstruct SQLite conversation history from the durable GitHub log
-    # after an ephemeral HF Space restart.  Only triggers when SQLite is empty
-    # (<10 rows) so a healthy Space with accumulated data is never overwritten.
-    if github_log.enabled and _sqlite_row_count(settings.DB_PATH) < 10:
-        logger.info("SQLite appears empty — attempting reconstruction from durable log.")
-        recent = await github_log.load_recent(500)
-        if recent:
-            app.state.conversation_store.populate_from_records(recent)
     # DagsHub/MLflow experiment tracking — optional, only active when token is set.
     # In prod with DAGSHUB_TOKEN set, experiments are tracked at dagshub.com.
@@ -222,7 +192,6 @@ async def lifespan(app: FastAPI):
         "vector_store": vector_store,
         "reranker": reranker,
         "db_path": settings.DB_PATH,
-        "github_log": github_log,
     })
     app.state.settings = settings
     app.state.qdrant = qdrant

 from app.security.rate_limiter import limiter, custom_rate_limit_handler
 from app.services.embedder import Embedder
 from app.services.gemini_client import GeminiClient
 from app.services.llm_client import get_llm_client, TpmBucket
 from app.services.reranker import Reranker
 from app.services.semantic_cache import SemanticCache
     return raw
 async def _qdrant_keepalive_loop(
     qdrant: QdrantClient,
     interval_seconds: int,
     settings = get_settings()
     logger.info("Starting PersonaBot API | env=%s", settings.ENVIRONMENT)
     # Attach the in-memory semantic cache. No external service required.
     app.state.semantic_cache = SemanticCache(
         max_size=settings.SEMANTIC_CACHE_SIZE,
         ttl_seconds=settings.SEMANTIC_CACHE_TTL_SECONDS,
         similarity_threshold=settings.SEMANTIC_CACHE_SIMILARITY_THRESHOLD,
     )
+    app.state.conversation_store = ConversationStore(settings.DB_PATH)
     # DagsHub/MLflow experiment tracking — optional, only active when token is set.
     # In prod with DAGSHUB_TOKEN set, experiments are tracked at dagshub.com.
         "vector_store": vector_store,
         "reranker": reranker,
         "db_path": settings.DB_PATH,
     })
     app.state.settings = settings
     app.state.qdrant = qdrant

app/pipeline/graph.py CHANGED Viewed

@@ -121,7 +121,7 @@ def build_pipeline(services: dict) -> CompiledStateGraph:
     # CRAG: query rewrite on failed retrieval — runs up to twice for portfolio queries.
     graph.add_node("rewrite_query",  make_rewrite_query_node(services["gemini"]))
     graph.add_node("generate",       make_generate_node(services["llm"], services["gemini"]))
-    graph.add_node("log_eval",       make_log_eval_node(services["db_path"], services.get("github_log")))
     graph.set_entry_point("guard")

     # CRAG: query rewrite on failed retrieval — runs up to twice for portfolio queries.
     graph.add_node("rewrite_query",  make_rewrite_query_node(services["gemini"]))
     graph.add_node("generate",       make_generate_node(services["llm"], services["gemini"]))
+    graph.add_node("log_eval",       make_log_eval_node(services["db_path"]))
     graph.set_entry_point("guard")

app/pipeline/nodes/log_eval.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import json
 import logging
 import sqlite3
@@ -6,16 +7,23 @@ from datetime import datetime, timezone
 from typing import Callable
 from app.models.pipeline import PipelineState
-from app.core.config import get_settings
 logger = logging.getLogger(__name__)
-def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState], dict]:
     """
     Writes interaction to SQLite synchronously (<5ms) inside the request lifespan.
-    Also appends to the durable GitHub JSONL log (fire-and-forget background task)
-    so training signals survive HuggingFace Space restarts.
     The `path` field tags which pipeline branch produced the answer:
       "cache_hit"    — served from semantic cache, no LLM called.
@@ -50,6 +58,7 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
              for c in state.get("reranked_chunks", [])]
         )
         path = state.get("path") or "rag"
         with sqlite3.connect(db_path) as conn:
             conn.execute(
@@ -71,7 +80,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     critic_completeness  INTEGER,
                     critic_specificity   INTEGER,
                     critic_quality       TEXT,
-                    is_enumeration_query BOOLEAN DEFAULT 0
                 )
                 """
             )
@@ -92,6 +102,7 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                 # RC-13: retrieval diagnostics
                 ("sibling_expansion_count", "INTEGER"),
                 ("focused_source_type", "TEXT"),
             ]:
                 try:
                     conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
@@ -104,8 +115,8 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     (timestamp, session_id, query, answer, chunks_used, rerank_scores,
                      reranked_chunks_json, latency_ms, cached, path,
                      critic_groundedness, critic_completeness, critic_specificity, critic_quality,
-                     is_enumeration_query, sibling_expansion_count, focused_source_type)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     datetime.now(tz=timezone.utc).isoformat(),
@@ -125,49 +136,50 @@ def make_log_eval_node(db_path: str, github_log=None) -> Callable[[PipelineState
                     state.get("is_enumeration_query", False),
                     state.get("sibling_expansion_count"),
                     state.get("focused_source_type"),
                 ),
             )
             return cursor.lastrowid  # type: ignore[return-value]
-    async def log_eval_node(state: PipelineState) -> dict:
         try:
             row_id = _write_to_sqlite(state)
-            # Append to durable GitHub log (fire-and-forget — never blocks the response).
-            if github_log is not None and github_log.enabled:
-                path = state.get("path") or "rag"
-                record = {
-                    "timestamp": datetime.now(tz=timezone.utc).isoformat(),
-                    "session_id": state.get("session_id", ""),
-                    "query": state.get("query", ""),
-                    "answer": state.get("answer", ""),
-                    "chunks_used": json.loads(
-                        json.dumps([c["metadata"]["doc_id"] for c in state.get("reranked_chunks", [])])
-                    ),
-                    "reranked_chunks_json": [
-                        {
-                            "doc_id": c["metadata"].get("doc_id", ""),
-                            "source_title": c["metadata"].get("source_title", ""),
-                            "source_type": c["metadata"].get("source_type", ""),
-                            "section": c["metadata"].get("section", ""),
-                        }
-                        for c in state.get("reranked_chunks", [])
-                    ],
-                    "rerank_scores": [
-                        c["metadata"].get("rerank_score", 0.0)
-                        for c in state.get("reranked_chunks", [])
-                    ],
-                    "latency_ms": state.get("latency_ms", 0),
-                    "cached": state.get("cached", False),
-                    "feedback": 0,
-                    "path": path,
-                    "critic_groundedness": state.get("critic_groundedness"),
-                    "critic_completeness": state.get("critic_completeness"),
-                    "critic_specificity": state.get("critic_specificity"),
-                    "critic_quality": state.get("critic_quality"),
-                    "is_enumeration_query": state.get("is_enumeration_query", False),
-                }
-                github_log.append(record)
             return {"interaction_id": row_id}
         except Exception as e:

+import asyncio
 import json
 import logging
 import sqlite3
 from typing import Callable
 from app.models.pipeline import PipelineState
+from app.services.loki_sink import ship_to_loki
 logger = logging.getLogger(__name__)
+_PENDING_TASKS: set[asyncio.Task[None]] = set()
+def _source_hit_proxy(state: PipelineState) -> int:
+    reranked_chunks = state.get("reranked_chunks", [])
+    chunk_count = len(reranked_chunks)
+    top_score = state.get("top_rerank_score")
+    return int(top_score is not None and top_score > -1.5 and chunk_count >= 2)
+def make_log_eval_node(db_path: str) -> Callable[[PipelineState], dict]:
     """
     Writes interaction to SQLite synchronously (<5ms) inside the request lifespan.
+    Ships a sanitised observability record to Grafana Loki asynchronously.
     The `path` field tags which pipeline branch produced the answer:
       "cache_hit"    — served from semantic cache, no LLM called.
              for c in state.get("reranked_chunks", [])]
         )
         path = state.get("path") or "rag"
+        source_hit_proxy = _source_hit_proxy(state)
         with sqlite3.connect(db_path) as conn:
             conn.execute(
                     critic_completeness  INTEGER,
                     critic_specificity   INTEGER,
                     critic_quality       TEXT,
+                    is_enumeration_query BOOLEAN DEFAULT 0,
+                    source_hit_proxy     INTEGER DEFAULT 0
                 )
                 """
             )
                 # RC-13: retrieval diagnostics
                 ("sibling_expansion_count", "INTEGER"),
                 ("focused_source_type", "TEXT"),
+                ("source_hit_proxy", "INTEGER DEFAULT 0"),
             ]:
                 try:
                     conn.execute(f"ALTER TABLE interactions ADD COLUMN {col} {definition}")
                     (timestamp, session_id, query, answer, chunks_used, rerank_scores,
                      reranked_chunks_json, latency_ms, cached, path,
                      critic_groundedness, critic_completeness, critic_specificity, critic_quality,
+                     is_enumeration_query, sibling_expansion_count, focused_source_type, source_hit_proxy)
+                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     datetime.now(tz=timezone.utc).isoformat(),
                     state.get("is_enumeration_query", False),
                     state.get("sibling_expansion_count"),
                     state.get("focused_source_type"),
+                    source_hit_proxy,
                 ),
             )
             return cursor.lastrowid  # type: ignore[return-value]
+    def _build_loki_record(state: PipelineState) -> dict:
+        return {
+            "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+            "session_id": state.get("session_id", ""),
+            "query": state.get("query", ""),
+            "answer": state.get("answer", ""),
+            "path": state.get("path") or "rag",
+            "cached": state.get("cached", False),
+            "latency_ms": state.get("latency_ms", 0),
+            "retrieval_attempts": state.get("retrieval_attempts", 0),
+            "top_rerank_score": state.get("top_rerank_score"),
+            "focused_source_type": state.get("focused_source_type"),
+            "sibling_expansion_count": state.get("sibling_expansion_count"),
+            "critic_groundedness": state.get("critic_groundedness"),
+            "critic_completeness": state.get("critic_completeness"),
+            "critic_specificity": state.get("critic_specificity"),
+            "critic_quality": state.get("critic_quality"),
+            "is_enumeration_query": state.get("is_enumeration_query", False),
+            "guard_passed": state.get("guard_passed", False),
+            "query_complexity": state.get("query_complexity", ""),
+            "is_followup": state.get("is_followup", False),
+            "is_audio_mode": state.get("is_audio_mode", False),
+            "follow_ups": state.get("follow_ups", []),
+            "reranked_chunks": state.get("reranked_chunks", []),
+            "source_hit_proxy": _source_hit_proxy(state),
+        }
+    def log_eval_node(state: PipelineState) -> dict:
         try:
             row_id = _write_to_sqlite(state)
+            try:
+                loop = asyncio.get_running_loop()
+                task = loop.create_task(ship_to_loki(_build_loki_record(state)))
+                _PENDING_TASKS.add(task)
+                task.add_done_callback(_PENDING_TASKS.discard)
+            except RuntimeError:
+                # Called outside an event loop (for example, synchronous unit tests).
+                # SQLite write still succeeds; Loki shipping is skipped.
+                logger.debug("No running event loop; skipping async Loki ship for this call.")
             return {"interaction_id": row_id}
         except Exception as e:

app/services/conversation_store.py CHANGED Viewed

@@ -32,9 +32,8 @@ class ConversationStore:
     One instance is created at startup and shared across all requests via app.state.
     """
-    def __init__(self, db_path: str, github_log=None) -> None:
         self._db_path = db_path
-        self._github_log = github_log
         self._ensure_summary_table()
     def _ensure_summary_table(self) -> None:
@@ -136,79 +135,3 @@ class ConversationStore:
         except Exception as exc:
             logger.warning("ConversationStore.mark_last_negative SQLite failed: %s", exc)
-        if self._github_log is not None:
-            self._github_log.append_feedback(session_id, feedback=-1)
-    def populate_from_records(self, records: list[dict]) -> None:
-        """
-        Replay interaction records from the durable GitHub log into SQLite.
-        Called at startup when SQLite is empty after a Space restart.
-        """
-        import os
-        db_dir = os.path.dirname(self._db_path)
-        if db_dir:
-            os.makedirs(db_dir, exist_ok=True)
-        interaction_records = [
-            r for r in records
-            if r.get("type") != "feedback" and r.get("query")
-        ]
-        if not interaction_records:
-            return
-        try:
-            with sqlite3.connect(self._db_path) as conn:
-                conn.execute(
-                    """
-                    CREATE TABLE IF NOT EXISTS interactions (
-                        id                   INTEGER PRIMARY KEY AUTOINCREMENT,
-                        timestamp            TEXT,
-                        session_id           TEXT,
-                        query                TEXT,
-                        answer               TEXT,
-                        chunks_used          TEXT,
-                        rerank_scores        TEXT,
-                        reranked_chunks_json TEXT,
-                        latency_ms           INTEGER,
-                        cached               BOOLEAN,
-                        feedback             INTEGER DEFAULT 0,
-                        path                 TEXT DEFAULT 'rag'
-                    )
-                    """
-                )
-                feedback_corrections: dict[str, int] = {}
-                for r in records:
-                    if r.get("type") == "feedback":
-                        feedback_corrections[r["session_id"]] = r.get("feedback", 0)
-                for r in interaction_records:
-                    sid = r.get("session_id", "")
-                    feedback = feedback_corrections.get(sid, r.get("feedback", 0))
-                    conn.execute(
-                        """
-                        INSERT INTO interactions
-                            (timestamp, session_id, query, answer, chunks_used,
-                             rerank_scores, reranked_chunks_json, latency_ms, cached, feedback, path)
-                        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-                        """,
-                        (
-                            r.get("timestamp", datetime.now(tz=timezone.utc).isoformat()),
-                            sid,
-                            r.get("query", ""),
-                            r.get("answer", ""),
-                            json.dumps(r.get("chunks_used", [])),
-                            json.dumps(r.get("rerank_scores", [])),
-                            json.dumps(r.get("reranked_chunks_json", [])),
-                            r.get("latency_ms", 0),
-                            r.get("cached", False),
-                            feedback,
-                            r.get("path", "rag"),
-                        ),
-                    )
-            logger.info(
-                "Reconstructed %d interactions from durable GitHub log into SQLite.",
-                len(interaction_records),
-            )
-        except Exception as exc:
-            logger.warning("ConversationStore.populate_from_records failed: %s", exc)

     One instance is created at startup and shared across all requests via app.state.
     """
+    def __init__(self, db_path: str) -> None:
         self._db_path = db_path
         self._ensure_summary_table()
     def _ensure_summary_table(self) -> None:
         except Exception as exc:
             logger.warning("ConversationStore.mark_last_negative SQLite failed: %s", exc)

app/services/loki_sink.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from __future__ import annotations
+import hashlib
+import json
+import logging
+import os
+import time
+from typing import Any
+import httpx
+logger = logging.getLogger(__name__)
+_LOKI_URL = os.environ.get("LOKI_URL", "").strip()
+_LOKI_USERNAME = os.environ.get("LOKI_USERNAME", "").strip()
+_LOKI_API_KEY = os.environ.get("LOKI_API_KEY", "").strip()
+_LOKI_ENABLED = bool(_LOKI_URL)
+if not _LOKI_ENABLED:
+    logger.warning("LOKI_URL is not set; Loki shipping is disabled.")
+def _sha_prefix(value: str, prefix_len: int) -> str:
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:prefix_len]
+def _extract_chunk_metrics(reranked_chunks: list[dict[str, Any]]) -> tuple[list[float], list[str], int, str]:
+    rerank_scores: list[float] = []
+    source_types: set[str] = set()
+    for chunk in reranked_chunks:
+        metadata = chunk.get("metadata", {})
+        score = metadata.get("rerank_score")
+        if isinstance(score, (int, float)):
+            rerank_scores.append(float(score))
+        source_type = str(metadata.get("source_type", "") or "")
+        if source_type:
+            source_types.add(source_type)
+    chunk_count = len(reranked_chunks)
+    top_chunk_doc_id = ""
+    if chunk_count > 0:
+        top_chunk_doc_id = str(reranked_chunks[0].get("metadata", {}).get("doc_id", "") or "")
+    return rerank_scores, sorted(source_types), chunk_count, top_chunk_doc_id
+def _to_float_or_none(value: Any) -> float | None:
+    if isinstance(value, (int, float)):
+        return float(value)
+    return None
+def _build_sanitized_record(record: dict[str, Any]) -> dict[str, Any]:
+    reranked_chunks = record.get("reranked_chunks") or []
+    query = str(record.get("query", ""))
+    session_id = str(record.get("session_id", ""))
+    rerank_scores, source_types_used, chunk_count, top_chunk_doc_id = _extract_chunk_metrics(reranked_chunks)
+    top_rerank_score = _to_float_or_none(record.get("top_rerank_score"))
+    source_hit_proxy = int(top_rerank_score is not None and top_rerank_score > -1.5 and chunk_count >= 2)
+    return {
+        "timestamp": str(record.get("timestamp", "")),
+        "session_id": _sha_prefix(session_id, 12) if session_id else "",
+        "path": str(record.get("path", "rag") or "rag"),
+        "cached": bool(record.get("cached", False)),
+        "latency_ms": int(record.get("latency_ms", 0) or 0),
+        "retrieval_attempts": int(record.get("retrieval_attempts", 0) or 0),
+        "top_rerank_score": top_rerank_score,
+        "focused_source_type": str(record.get("focused_source_type", "") or ""),
+        "sibling_expansion_count": int(record.get("sibling_expansion_count", 0) or 0),
+        "critic_groundedness": record.get("critic_groundedness"),
+        "critic_completeness": record.get("critic_completeness"),
+        "critic_specificity": record.get("critic_specificity"),
+        "critic_quality": str(record.get("critic_quality", "") or ""),
+        "is_enumeration_query": bool(record.get("is_enumeration_query", False)),
+        "guard_passed": bool(record.get("guard_passed", False)),
+        "query_complexity": str(record.get("query_complexity", "") or ""),
+        "is_followup": bool(record.get("is_followup", False)),
+        "is_audio_mode": bool(record.get("is_audio_mode", False)),
+        "query_hash": _sha_prefix(query, 16) if query else "",
+        "chunk_count": chunk_count,
+        "top_chunk_doc_id": top_chunk_doc_id,
+        "source_types_used": source_types_used,
+        "follow_up_count": len(record.get("follow_ups") or []),
+        "rerank_scores": rerank_scores,
+        "source_hit_proxy": source_hit_proxy,
+    }
+async def ship_to_loki(record: dict[str, Any]) -> None:
+    if not _LOKI_ENABLED:
+        return
+    try:
+        sanitized = _build_sanitized_record(record)
+        payload = {
+            "streams": [
+                {
+                    "stream": {
+                        "app": "personabot",
+                        "env": "production",
+                        "path": str(sanitized.get("path", "rag") or "rag"),
+                        "critic_quality": str(sanitized.get("critic_quality", "") or ""),
+                        "focused_source_type": str(sanitized.get("focused_source_type", "") or ""),
+                    },
+                    "values": [
+                        [str(time.time_ns()), json.dumps(sanitized)]
+                    ],
+                }
+            ]
+        }
+        timeout = httpx.Timeout(3.0)
+        async with httpx.AsyncClient(timeout=timeout, auth=(_LOKI_USERNAME, _LOKI_API_KEY)) as client:
+            response = await client.post(_LOKI_URL, json=payload)
+            response.raise_for_status()
+    except Exception as exc:
+        logger.warning("Loki ship failed: %s", exc)

tests/test_log_eval_privacy.py CHANGED Viewed

@@ -6,8 +6,7 @@ import pytest
 from app.pipeline.nodes.log_eval import make_log_eval_node
-@pytest.mark.asyncio
-async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
     db_path = str(tmp_path / "interactions.db")
     node = make_log_eval_node(db_path)
@@ -33,7 +32,7 @@ async def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
         "is_enumeration_query": False,
     }
-    await node(state)
     with sqlite3.connect(db_path) as conn:
         row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone()

 from app.pipeline.nodes.log_eval import make_log_eval_node
+def test_log_eval_stores_chunk_metadata_without_text(tmp_path) -> None:
     db_path = str(tmp_path / "interactions.db")
     node = make_log_eval_node(db_path)
         "is_enumeration_query": False,
     }
+    node(state)
     with sqlite3.connect(db_path) as conn:
         row = conn.execute("SELECT reranked_chunks_json FROM interactions LIMIT 1").fetchone()

tests/test_loki_sink.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import asyncio
+import json
+import sqlite3
+import httpx
+import pytest
+from app.pipeline.nodes.log_eval import make_log_eval_node
+from app.services import loki_sink
+class _FakeResponse:
+    def raise_for_status(self) -> None:
+        return None
+@pytest.mark.asyncio
+async def test_ship_to_loki_sanitises_pii(monkeypatch) -> None:
+    captured: dict = {}
+    class _FakeAsyncClient:
+        def __init__(self, *args, **kwargs):
+            # No-op test double constructor.
+            self._args = args
+            self._kwargs = kwargs
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+        async def post(self, url, json):
+            await asyncio.sleep(0)
+            captured["url"] = url
+            captured["payload"] = json
+            return _FakeResponse()
+    monkeypatch.setattr(loki_sink, "_LOKI_ENABLED", True)
+    monkeypatch.setattr(loki_sink, "_LOKI_URL", "https://example.grafana.net/loki/api/v1/push")
+    monkeypatch.setattr(loki_sink, "_LOKI_USERNAME", "123")
+    monkeypatch.setattr(loki_sink, "_LOKI_API_KEY", "glc_test")
+    monkeypatch.setattr(loki_sink.httpx, "AsyncClient", _FakeAsyncClient)
+    await loki_sink.ship_to_loki(
+        {
+            "timestamp": "2026-04-21T00:00:00Z",
+            "session_id": "session-123",
+            "query": "my private query",
+            "answer": "long text",
+            "path": "rag",
+            "cached": False,
+            "latency_ms": 100,
+            "retrieval_attempts": 1,
+            "top_rerank_score": -1.0,
+            "focused_source_type": "resume",
+            "sibling_expansion_count": 0,
+            "critic_groundedness": 3,
+            "critic_completeness": 3,
+            "critic_specificity": 3,
+            "critic_quality": "high",
+            "is_enumeration_query": False,
+            "guard_passed": True,
+            "query_complexity": "simple",
+            "is_followup": False,
+            "is_audio_mode": False,
+            "follow_ups": [],
+            "reranked_chunks": [],
+        }
+    )
+    values = captured["payload"]["streams"][0]["values"]
+    assert values
+    serialized = values[0][1]
+    assert "my private query" not in serialized
+    assert "long text" not in serialized
+    payload_record = json.loads(serialized)
+    assert payload_record.get("query_hash")
+@pytest.mark.asyncio
+async def test_ship_to_loki_noop_when_no_url(monkeypatch) -> None:
+    called = {"post": False}
+    class _NeverCalledClient:
+        def __init__(self, *args, **kwargs):
+            # No-op test double constructor.
+            self._args = args
+            self._kwargs = kwargs
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+        async def post(self, url, json):
+            await asyncio.sleep(0)
+            called["post"] = True
+            return _FakeResponse()
+    monkeypatch.setattr(loki_sink, "_LOKI_ENABLED", False)
+    monkeypatch.setattr(loki_sink.httpx, "AsyncClient", _NeverCalledClient)
+    await loki_sink.ship_to_loki({"query": "x", "answer": "y"})
+    assert called["post"] is False
+@pytest.mark.asyncio
+async def test_ship_to_loki_swallows_http_error(monkeypatch) -> None:
+    class _FailingClient:
+        def __init__(self, *args, **kwargs):
+            # No-op test double constructor.
+            self._args = args
+            self._kwargs = kwargs
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return None
+        async def post(self, url, json):
+            await asyncio.sleep(0)
+            raise httpx.ConnectError("connection failed")
+    monkeypatch.setattr(loki_sink, "_LOKI_ENABLED", True)
+    monkeypatch.setattr(loki_sink, "_LOKI_URL", "https://example.grafana.net/loki/api/v1/push")
+    monkeypatch.setattr(loki_sink, "_LOKI_USERNAME", "123")
+    monkeypatch.setattr(loki_sink, "_LOKI_API_KEY", "glc_test")
+    monkeypatch.setattr(loki_sink.httpx, "AsyncClient", _FailingClient)
+    await loki_sink.ship_to_loki({"query": "q", "answer": "a", "path": "rag", "reranked_chunks": []})
+@pytest.mark.asyncio
+async def test_source_hit_proxy_logged_to_sqlite(tmp_path) -> None:
+    db_path = str(tmp_path / "interactions.db")
+    node = make_log_eval_node(db_path)
+    state = {
+        "session_id": "s1",
+        "query": "What work experience does Darshan have?",
+        "answer": "He worked at VK Live.",
+        "reranked_chunks": [
+            {"text": "a", "metadata": {"doc_id": "d1", "source_type": "resume", "rerank_score": -1.0}},
+            {"text": "b", "metadata": {"doc_id": "d2", "source_type": "resume", "rerank_score": -1.2}},
+            {"text": "c", "metadata": {"doc_id": "d3", "source_type": "resume", "rerank_score": -1.3}},
+        ],
+        "latency_ms": 123,
+        "cached": False,
+        "path": "rag",
+        "is_enumeration_query": False,
+        "top_rerank_score": -1.0,
+        "retrieval_attempts": 1,
+        "follow_ups": [],
+    }
+    result = node(state)
+    assert "interaction_id" in result
+    with sqlite3.connect(db_path) as conn:
+        row = conn.execute("SELECT source_hit_proxy FROM interactions LIMIT 1").fetchone()
+    assert row is not None
+    assert row[0] == 1