File size: 10,667 Bytes
21b1ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 |
{
"cells": [
{
"cell_type": "markdown",
"id": "d7ddecba",
"metadata": {},
"source": [
"# π Notebook 03: Evaluate RAGAS Performance\n",
"\n",
"This notebook evaluates your RAG pipeline using the RAGAS metrics against a Qdrant vector store."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a70a9c8c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/mwalker/development/TAMARKDesigns/AI-Maker-Space/cohort-6/projects/session-05/AIE6-Golf-Agent/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"# β
1. Setup & Imports\n",
"import os\n",
"import json\n",
"import sys\n",
"from tqdm import tqdm\n",
"from qdrant_client import QdrantClient\n",
"from qdrant_client.http import models as rest\n",
"from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness\n",
"from ragas import evaluate\n",
"from openai import OpenAI\n",
"from sentence_transformers import SentenceTransformer\n",
"import pandas as pd\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"# Load environment variables\n",
"load_dotenv(dotenv_path=\"../backend/.env\")\n",
"\n",
"# Get Qdrant environment variables\n",
"COLLECTION_NAME = os.getenv(\"QDRANT_COLLECTION_NAME\", \"golf_shot_vectors\")\n",
"EMBEDDING_MODEL = os.getenv(\"EMBEDDING_MODEL\", \"thenlper/gte-small\")\n",
"QDRANT_API_KEY = os.getenv(\"QDRANT_API_KEY\")\n",
"\n",
"# Create client for the Qdrant vector store.\n",
"client = QdrantClient(\n",
" url='https://6f592f43-f667-4234-ad3a-4f15ed5882ef.us-west-2-0.aws.cloud.qdrant.io:6333',\n",
" api_key=QDRANT_API_KEY\n",
")\n",
"\n",
"MODEL_INSTANCE = SentenceTransformer(EMBEDDING_MODEL)\n",
"\n",
"# Add the backend directory to the Python path\n",
"sys.path.append(\"../backend\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bf7d4dfc",
"metadata": {},
"outputs": [],
"source": [
"# β
2. Load Ground Truth Dataset\n",
"with open('../data/raw/golden_shot_dataset.json') as f:\n",
" dataset = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7e95f80e",
"metadata": {},
"outputs": [],
"source": [
"# β
3. Function to Query Qdrant and Retrieve Context\n",
"\n",
"\n",
"from tools.golf_shot_recommendations_tool import preprocess_query_with_llm\n",
"from qdrant_client.http.exceptions import UnexpectedResponse\n",
"\n",
"def get_embedding(text):\n",
" model = MODEL_INSTANCE\n",
" if EMBEDDING_MODEL.startswith(\"intfloat\"): # \"e5\" model type\n",
" return model.encode(f\"query: {text}\")\n",
" else:\n",
" return model.encode(text)\n",
"\n",
"\n",
"def get_contexts(question, top_k=3):\n",
" # Preprocess the query using LLM\n",
" preprocessed_query = preprocess_query_with_llm(question)\n",
" \n",
" # Get embeddings for the preprocessed query\n",
" model = MODEL_INSTANCE\n",
" query_vector = get_embedding(preprocessed_query)\n",
" \n",
" # Search Qdrant\n",
" try:\n",
" results = client.query_points(\n",
" collection_name=COLLECTION_NAME,\n",
" query=query_vector,\n",
" limit=top_k,\n",
" with_payload=True\n",
" )\n",
" except UnexpectedResponse as e:\n",
" if \"Vector dimension error\" in str(e):\n",
" raise ValueError(\n",
" f\"Vector dimension mismatch! The current embedding model ({EMBEDDING_MODEL}) \"\n",
" f\"produces vectors of a different dimension than what's expected by the Qdrant collection. \"\n",
" f\"Please check your EMBEDDING_MODEL environment variable and ensure it matches the model \"\n",
" f\"used to create the vectors in your Qdrant collection.\"\n",
" ) from e\n",
" raise # Re-raise other UnexpectedResponse errors\n",
" \n",
" # Format the results\n",
" recommendations = []\n",
" for point in results.points:\n",
" recommendations.append(f\"Score: {point.score:.4f} | {point.payload['text']}\")\n",
" \n",
" return recommendations\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "34df4b1f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[DEBUG] Adding node: search_golfpedia with RunnableLambda(run)\n",
"[DEBUG] Adding node: course_insights with RunnableLambda(run)\n",
"[DEBUG] Adding node: get_pro_stats with RunnableLambda(run)\n",
"[DEBUG] Adding node: get_shot_recommendations with RunnableLambda(run)\n",
"[DEBUG] All nodes in workflow: ['router', 'search_golfpedia', 'course_insights', 'get_pro_stats', 'get_shot_recommendations', 'summarize']\n",
"[DEBUG] Node 'router' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=router(tags=None, recurse=True, explode_args=False, func_accepts_config=False, func_accepts={}), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
"[DEBUG] Node 'search_golfpedia' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
"[DEBUG] Node 'course_insights' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
"[DEBUG] Node 'get_pro_stats' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
"[DEBUG] Node 'get_shot_recommendations' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(run), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n",
"[DEBUG] Node 'summarize' is of type <class 'langgraph.graph.state.StateNodeSpec'> and value: StateNodeSpec(runnable=RunnableLambda(summarize_result), metadata=None, input=<class 'agents.golf_langgraph.AgentState'>, retry_policy=None, ends=())\n"
]
}
],
"source": [
"# β
4. Generate Answers Using Your RAG Pipeline\n",
"from agents.golf_langgraph import summarize_result\n",
"\n",
"def generate_answer(question, contexts):\n",
" context_str = '\\n'.join(contexts)\n",
" state = {\n",
" \"input\": question,\n",
" \"tool_result\": context_str\n",
" }\n",
" \n",
" # Use the summarize_result function to generate the answer\n",
" answer = summarize_result(state)\n",
" return answer.get(\"final_response\")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f89ecff0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 0/40 [00:00<?, ?it/s]/Users/mwalker/development/TAMARKDesigns/AI-Maker-Space/cohort-6/projects/session-05/AIE6-Golf-Agent/notebooks/../backend/agents/golf_langgraph.py:21: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-openai package and should be used instead. To use it run `pip install -U :class:`~langchain-openai` and import as `from :class:`~langchain_openai import ChatOpenAI``.\n",
" return ChatOpenAI(\n",
"100%|ββββββββββ| 40/40 [04:55<00:00, 7.39s/it]\n"
]
}
],
"source": [
"# β
5. Run Evaluation on Dataset\n",
"records = []\n",
"\n",
"for entry in tqdm(dataset):\n",
" q = entry['query']\n",
" gt = entry['ideal_answer']\n",
" ctx = get_contexts(q)\n",
" ans = generate_answer(q, ctx)\n",
" \n",
" records.append({\n",
" \"user_input\": q,\n",
" \"retrieved_contexts\": ctx,\n",
" \"response\": ans,\n",
" \"reference\": gt\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4b7a0b6e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating: 100%|ββββββββββ| 200/200 [03:18<00:00, 1.01it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'faithfulness': 0.4304, 'answer_relevancy': 0.0436, 'context_precision': 0.4458, 'context_recall': 0.4125, 'answer_correctness': 0.3816}\n"
]
}
],
"source": [
"# β
6. Evaluate with RAGAS\n",
"# df = pd.DataFrame(records)\n",
"\n",
"# Convert your records to the required schema\n",
"ragas_records = [\n",
" {\n",
" \"user_input\": r[\"user_input\"],\n",
" \"retrieved_contexts\": r[\"retrieved_contexts\"],\n",
" \"response\": r[\"response\"],\n",
" \"reference\": r[\"reference\"],\n",
" }\n",
" for r in records\n",
"]\n",
"\n",
"from ragas.evaluation import EvaluationDataset\n",
"\n",
"dataset = EvaluationDataset.from_list(ragas_records)\n",
"\n",
"ragas_results = evaluate(\n",
" dataset,\n",
" metrics=[faithfulness, answer_relevancy, context_precision, context_recall, answer_correctness]\n",
")\n",
"print(ragas_results)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|