File size: 10,667 Bytes

21b1ed6

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d7ddecba",
   "metadata": {},
   "source": [
    "# 📘 Notebook 03: Evaluate RAGAS Performance\n",
    "\n",
    "This notebook evaluates your RAG pipeline using the RAGAS metrics against a Qdrant vector store."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a70a9c8c",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/mwalker/development/TAMARKDesigns/AI-Maker-Space/cohort-6/projects/session-05/AIE6-Golf-Agent/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "# ✅ 1. Setup & Imports\n",
    "import os\n",
    "import json\n",
    "import sys\n",
    "from tqdm import tqdm\n",
    "from qdrant_client import QdrantClient\n",
    "from qdrant_client.http import models as rest\n",
    "from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness\n",
    "from ragas import evaluate\n",
    "from openai import OpenAI\n",
    "from sentence_transformers import SentenceTransformer\n",
    "import pandas as pd\n",
    "\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv(dotenv_path=\"../backend/.env\")\n",
    "\n",
    "# Get Qdrant environment variables\n",
    "COLLECTION_NAME = os.getenv(\"QDRANT_COLLECTION_NAME\", \"golf_shot_vectors\")\n",
    "EMBEDDING_MODEL = os.getenv(\"EMBEDDING_MODEL\", \"thenlper/gte-small\")\n",
    "QDRANT_API_KEY = os.getenv(\"QDRANT_API_KEY\")\n",
    "\n",
    "# Create client for the Qdrant vector store.\n",
    "client = QdrantClient(\n",
    "    url='https://6f592f43-f667-4234-ad3a-4f15ed5882ef.us-west-2-0.aws.cloud.qdrant.io:6333',\n",
    "    api_key=QDRANT_API_KEY\n",
    ")\n",
    "\n",
    "MODEL_INSTANCE = SentenceTransformer(EMBEDDING_MODEL)\n",
    "\n",
    "# Add the backend directory to the Python path\n",
    "sys.path.append(\"../backend\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bf7d4dfc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ✅ 2. Load Ground Truth Dataset\n",
    "with open('../data/raw/golden_shot_dataset.json') as f:\n",
    "    dataset = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7e95f80e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ✅ 3. Function to Query Qdrant and Retrieve Context\n",
    "\n",
    "\n",
    "from tools.golf_shot_recommendations_tool import preprocess_query_with_llm\n",
    "from qdrant_client.http.exceptions import UnexpectedResponse\n",
    "\n",
    "def get_embedding(text):\n",
    "    model = MODEL_INSTANCE\n",
    "    if EMBEDDING_MODEL.startswith(\"intfloat\"): # \"e5\" model type\n",
    "        return model.encode(f\"query: {text}\")\n",
    "    else:\n",
    "        return model.encode(text)\n",
    "\n",
    "\n",
    "def get_contexts(question, top_k=3):\n",
    "    # Preprocess the query using LLM\n",
    "    preprocessed_query = preprocess_query_with_llm(question)\n",
    "    \n",
    "    # Get embeddings for the preprocessed query\n",
    "    model = MODEL_INSTANCE\n",
    "    query_vector = get_embedding(preprocessed_query)\n",
    "    \n",
    "    # Search Qdrant\n",
    "    try:\n",
    "        results = client.query_points(\n",
    "            collection_name=COLLECTION_NAME,\n",
    "            query=query_vector,\n",
    "            limit=top_k,\n",
    "            with_payload=True\n",
    "        )\n",
    "    except UnexpectedResponse as e:\n",
    "        if \"Vector dimension error\" in str(e):\n",
    "            raise ValueError(\n",
    "                f\"Vector dimension mismatch! The current embedding model ({EMBEDDING_MODEL}) \"\n",
    "                f\"produces vectors of a different dimension than what's expected by the Qdrant collection. \"\n",
    "                f\"Please check your EMBEDDING_MODEL environment variable and ensure it matches the model \"\n",
    "                f\"used to create the vectors in your Qdrant collection.\"\n",
    "            ) from e\n",
    "        raise  # Re-raise other UnexpectedResponse errors\n",
    "    \n",
    "    # Format the results\n",
    "    recommendations = []\n",
    "    for point in results.points:\n",
    "        recommendations.append(f\"Score: {point.score:.4f} | {point.payload['text']}\")\n",
    "    \n",
    "    return recommendations\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "34df4b1f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[DEBUG] Adding node: search_golfpedia with RunnableLambda(run)\n",
      "[DEBUG] Adding node: course_insights with RunnableLambda(run)\n",
      "[DEBUG] Adding node: get_pro_stats with RunnableLambda(run)\n",
      "[DEBUG] Adding node: get_shot_recommendations with RunnableLambda(run)\n",
      "[DEBUG] All nodes in workflow: ['router', 'search_golfpedia', 'course_insights', 'get_pro_stats', 'get_shot_recommendations', 'summarize']\n",
      "[DEBUG] Node 'router' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=router(tags=None, recurse=True, explode_args=False, func_accepts_config=False, func_accepts={}), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
      "[DEBUG] Node 'search_golfpedia' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
      "[DEBUG] Node 'course_insights' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
      "[DEBUG] Node 'get_pro_stats' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
      "[DEBUG] Node 'get_shot_recommendations' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
      "[DEBUG] Node 'summarize' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(summarize_result), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n"
     ]
    }
   ],
   "source": [
    "# ✅ 4. Generate Answers Using Your RAG Pipeline\n",
    "from agents.golf_langgraph import summarize_result\n",
    "\n",
    "def generate_answer(question, contexts):\n",
    "    context_str = '\\n'.join(contexts)\n",
    "    state = {\n",
    "        \"input\": question,\n",
    "        \"tool_result\": context_str\n",
    "    }\n",
    "    \n",
    "    # Use the summarize_result function to generate the answer\n",
    "    answer = summarize_result(state)\n",
    "    return answer.get(\"final_response\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f89ecff0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/40 [00:00<?, ?it/s]/Users/mwalker/development/TAMARKDesigns/AI-Maker-Space/cohort-6/projects/session-05/AIE6-Golf-Agent/notebooks/../backend/agents/golf_langgraph.py:21: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import ChatOpenAI``.\n",
      "  return ChatOpenAI(\n",
      "100%|██████████| 40/40 [04:55<00:00,  7.39s/it]\n"
     ]
    }
   ],
   "source": [
    "# ✅ 5. Run Evaluation on Dataset\n",
    "records = []\n",
    "\n",
    "for entry in tqdm(dataset):\n",
    "    q = entry['query']\n",
    "    gt = entry['ideal_answer']\n",
    "    ctx = get_contexts(q)\n",
    "    ans = generate_answer(q, ctx)\n",
    "    \n",
    "    records.append({\n",
    "        \"user_input\": q,\n",
    "        \"retrieved_contexts\": ctx,\n",
    "        \"response\": ans,\n",
    "        \"reference\": gt\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4b7a0b6e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluating: 100%|██████████| 200/200 [03:18<00:00,  1.01it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'faithfulness': 0.4304, 'answer_relevancy': 0.0436, 'context_precision': 0.4458, 'context_recall': 0.4125, 'answer_correctness': 0.3816}\n"
     ]
    }
   ],
   "source": [
    "# ✅ 6. Evaluate with RAGAS\n",
    "# df = pd.DataFrame(records)\n",
    "\n",
    "# Convert your records to the required schema\n",
    "ragas_records = [\n",
    "    {\n",
    "        \"user_input\": r[\"user_input\"],\n",
    "        \"retrieved_contexts\": r[\"retrieved_contexts\"],\n",
    "        \"response\": r[\"response\"],\n",
    "        \"reference\": r[\"reference\"],\n",
    "    }\n",
    "    for r in records\n",
    "]\n",
    "\n",
    "from ragas.evaluation import EvaluationDataset\n",
    "\n",
    "dataset = EvaluationDataset.from_list(ragas_records)\n",
    "\n",
    "ragas_results = evaluate(\n",
    "    dataset,\n",
    "    metrics=[faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness]\n",
    ")\n",
    "print(ragas_results)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}