File size: 8,762 Bytes
af85e91
 
 
 
 
 
 
 
 
 
 
 
 
 
a018353
af85e91
 
a018353
 
 
 
 
 
 
 
 
af85e91
 
 
 
 
a018353
8825f6e
 
 
 
 
a018353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af85e91
 
 
9681c5d
 
af85e91
 
9681c5d
 
 
 
 
 
af85e91
 
 
 
8825f6e
9681c5d
af85e91
9681c5d
 
 
 
 
a018353
9681c5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af85e91
8825f6e
9681c5d
a018353
af85e91
 
 
 
 
9681c5d
af85e91
a018353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9681c5d
 
 
 
a018353
9681c5d
 
 
 
 
 
 
af85e91
a018353
af85e91
 
 
 
a018353
9681c5d
af85e91
 
 
a018353
af85e91
 
 
 
 
 
 
 
 
 
 
 
 
 
a018353
af85e91
 
9681c5d
 
 
 
 
 
 
 
a018353
 
 
9681c5d
 
 
a018353
 
 
9681c5d
 
 
a018353
 
 
9681c5d
 
 
a018353
 
 
9681c5d
 
 
af85e91
 
9681c5d
af85e91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a018353
9681c5d
af85e91
 
 
9681c5d
af85e91
 
 
 
 
 
 
 
 
 
9681c5d
 
 
 
 
 
af85e91
9681c5d
 
af85e91
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b1a955e7",
   "metadata": {},
   "source": [
    "# Update Blog Data\n",
    "\n",
    "This notebook demonstrates how to update the blog data and vector store when new blog posts are published. It uses the utility functions from `utils_data_loading.ipynb`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "6ec048b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Adding package root to sys.path: /home/mafzaal/source/lets-talk/py-src\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "from pathlib import Path\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "\n",
    "import sys\n",
    "import os\n",
    "\n",
    "# Add the project root to the Python path\n",
    "package_root = os.path.abspath(os.path.join(os.getcwd(), \"../\"))\n",
    "print(f\"Adding package root to sys.path: {package_root}\")\n",
    "if package_root not in sys.path:\n",
    "\tsys.path.append(package_root)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "7a7a9f3f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Current notebook directory: /home/mafzaal/source/lets-talk/py-src/notebooks\n",
      "Project root: /home/mafzaal/source/lets-talk\n"
     ]
    }
   ],
   "source": [
    "notebook_dir = os.getcwd()\n",
    "print(f\"Current notebook directory: {notebook_dir}\")\n",
    "# change to the directory to the root of the project\n",
    "project_root = os.path.abspath(os.path.join(os.getcwd(), \"../../\"))\n",
    "print(f\"Project root: {project_root}\")\n",
    "os.chdir(project_root)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc19ab4c",
   "metadata": {},
   "source": [
    "## Update Blog Data Process\n",
    "\n",
    "This process will:\n",
    "1. Load existing blog posts\n",
    "2. Process and update metadata\n",
    "3. Create or update vector embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d56f688",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 14/14 [00:00<00:00, 4617.46it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loaded 14 documents from data/\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import lets_talk.utils.blog as  blog_utils\n",
    "docs = blog_utils.load_blog_posts()\n",
    "docs = blog_utils.update_document_metadata(docs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a14c70dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Split 14 documents into 162 chunks\n"
     ]
    }
   ],
   "source": [
    "split_docs = blog_utils.split_documents(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1c40c587",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Document(metadata={'source': 'data/introduction-to-ragas/index.md', 'url': 'https://thedataguy.pro/blog/introduction-to-ragas/', 'post_slug': 'introduction-to-ragas', 'post_title': '\"Part 1: Introduction to Ragas: The Essential Evaluation Framework for LLM Applications\"', 'content_length': 6994}, page_content='---\\ntitle: \"Part 1: Introduction to Ragas: The Essential Evaluation Framework for LLM Applications\"\\ndate: 2025-04-26T18:00:00-06:00\\nlayout: blog\\ndescription: \"Explore the essential evaluation framework for LLM applications with Ragas. Learn how to assess performance, ensure accuracy, and improve reliability in Retrieval-Augmented Generation systems.\"\\ncategories: [\"AI\", \"RAG\", \"Evaluation\",\"Ragas\"]\\ncoverImage: \"https://images.unsplash.com/photo-1593642634367-d91a135587b5?q=80&w=1770&auto=format&fit=crop&ixlib=rb-4.0.3\"\\nreadingTime: 7\\npublished: true\\n---\\n\\nAs Large Language Models (LLMs) become fundamental components of modern applications, effectively evaluating their performance becomes increasingly critical. Whether you\\'re building a question-answering system, a document retrieval tool, or a conversational agent, you need reliable metrics to assess how well your application performs. This is where Ragas steps in.\\n\\n## What is Ragas?')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "split_docs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "72dd14b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store = blog_utils = blog_utils.create_vector_store(split_docs,'./db/vector_store_5')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ad3b2dca",
   "metadata": {},
   "source": [
    "## Testing the Vector Store\n",
    "\n",
    "Let's test the vector store with a few queries to make sure it's working correctly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "8b552e6b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Query: What is RAGAS?\n",
      "Retrieved 3 documents:\n",
      "1. \"Part 3: Evaluating RAG Systems with Ragas\" (https://thedataguy.pro/blog/evaluating-rag-systems-with-ragas/)\n",
      "2. \"Part 1: Introduction to Ragas: The Essential Evaluation Framework for LLM Applications\" (https://thedataguy.pro/blog/introduction-to-ragas/)\n",
      "3. \"Part 4: Generating Test Data with Ragas\" (https://thedataguy.pro/blog/generating-test-data-with-ragas/)\n",
      "\n",
      "Query: How to build research agents?\n",
      "Retrieved 3 documents:\n",
      "1. Building a Research Agent with RSS Feed Support (https://thedataguy.pro/blog/building-research-agent/)\n",
      "2. \"Part 1: Introduction to Ragas: The Essential Evaluation Framework for LLM Applications\" (https://thedataguy.pro/blog/introduction-to-ragas/)\n",
      "3. Building a Research Agent with RSS Feed Support (https://thedataguy.pro/blog/building-research-agent/)\n",
      "\n",
      "Query: What is metric driven development?\n",
      "Retrieved 3 documents:\n",
      "1. \"Metric-Driven Development: Make Smarter Decisions, Faster\" (https://thedataguy.pro/blog/metric-driven-development/)\n",
      "2. \"Metric-Driven Development: Make Smarter Decisions, Faster\" (https://thedataguy.pro/blog/metric-driven-development/)\n",
      "3. \"Part 5: Advanced Metrics and Customization with Ragas\" (https://thedataguy.pro/blog/advanced-metrics-and-customization-with-ragas/)\n",
      "\n",
      "Query: Who is TheDataGuy?\n",
      "Retrieved 3 documents:\n",
      "1. \"Part 2: Basic Evaluation Workflow with Ragas\" (https://thedataguy.pro/blog/basic-evaluation-workflow-with-ragas/)\n",
      "2. \"Part 2: Basic Evaluation Workflow with Ragas\" (https://thedataguy.pro/blog/basic-evaluation-workflow-with-ragas/)\n",
      "3. \"Part 6: Evaluating AI Agents: Beyond Simple Answers with Ragas\" (https://thedataguy.pro/blog/evaluating-ai-agents-with-ragas/)\n"
     ]
    }
   ],
   "source": [
    "# Create a retriever from the vector store\n",
    "retriever = vector_store.as_retriever(search_kwargs={\"k\": 3})\n",
    "\n",
    "# Test queries\n",
    "test_queries = [\n",
    "    \"What is RAGAS?\",\n",
    "    \"How to build research agents?\",\n",
    "    \"What is metric driven development?\",\n",
    "    \"Who is TheDataGuy?\"\n",
    "]\n",
    "\n",
    "for query in test_queries:\n",
    "    print(f\"\\nQuery: {query}\")\n",
    "    docs = retriever.invoke(query)\n",
    "    print(f\"Retrieved {len(docs)} documents:\")\n",
    "    for i, doc in enumerate(docs):\n",
    "        title = doc.metadata.get(\"post_title\", \"Unknown\")\n",
    "        url = doc.metadata.get(\"url\", \"No URL\")\n",
    "        print(f\"{i+1}. {title} ({url})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4cdd6899",
   "metadata": {},
   "outputs": [],
   "source": [
    "vector_store.client.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}