mbudisic commited on
Commit
e86ca95
·
1 Parent(s): 4df9c16

refactor: Update search_help function and improve logging

Browse files

- Simplified the signature of the `search_help` function by removing unnecessary parameters.
- Enhanced logging throughout the `search_help` function to provide better insights into its execution flow.
- Updated the `create_transcript_rag_chain` function to accept only `Configuration` type, improving type safety.
- Refactored datastore loading in the `graph` function to utilize async for better performance.

pstuts_rag/pstuts_rag/datastore.py CHANGED
@@ -152,7 +152,7 @@ class Datastore:
152
  except ValueError:
153
  self.qdrant_client.get_collection(self.collection_name)
154
  logging.info(f"Collection {self.collection_name} already exists.")
155
- self.reload = not (self.config.eva_reinitialize)
156
 
157
  # wrapper around the client
158
  self.vector_store = QdrantVectorStore(
 
152
  except ValueError:
153
  self.qdrant_client.get_collection(self.collection_name)
154
  logging.info(f"Collection {self.collection_name} already exists.")
155
+ self.reload = self.config.eva_reinitialize
156
 
157
  # wrapper around the client
158
  self.vector_store = QdrantVectorStore(
pstuts_rag/pstuts_rag/nodes.py CHANGED
@@ -151,9 +151,7 @@ def research(state: TutorialState, config: RunnableConfig):
151
  }
152
 
153
 
154
- async def search_help(
155
- state: TutorialState, config: RunnableConfig
156
- ) -> Command[Literal["search_help", "route_is_complete"]]:
157
  """Search Adobe Help documentation for relevant information.
158
 
159
  Args:
@@ -164,11 +162,16 @@ async def search_help(
164
  dict: Updated state with search results message and URL references
165
  """
166
 
167
- configurable = Configuration.from_runnable_config(config)
 
 
 
168
  cls = get_chat_api(configurable.llm_api)
169
  llm = cls(model=configurable.llm_tool_model, temperature=0)
170
  prompt = NODE_PROMPTS["search_summary"]
171
 
 
 
172
  adobe_help_search = TavilySearchResults(
173
  max_results=2,
174
  include_domains=["helpx.adobe.com"],
@@ -177,10 +180,13 @@ async def search_help(
177
  include_images=True,
178
  response_format="content_and_artifact", # Always returns artifacts
179
  )
 
 
180
  query = state["search_query"][-1]
181
 
182
  decision = state["search_permission"]
183
  if decision == YesNoAsk.ASK:
 
184
 
185
  response = interrupt(
186
  (
@@ -191,9 +197,7 @@ async def search_help(
191
 
192
  logging.info(f"Permission response '{response}'")
193
  decision = YesNoAsk.YES if "yes" in response.strip() else YesNoAsk.NO
194
- return Command(
195
- update={"search_permission": decision}, goto=search_help.__name__
196
- )
197
 
198
  response = {
199
  "messages": [],
@@ -207,18 +211,23 @@ async def search_help(
207
  longform = f"Query '{query}' is permitted."
208
  else:
209
  longform = f"Query '{query}' is NOT permitted."
 
210
 
211
  response["messages"].append({"role": "human", "content": longform})
212
 
213
  if decision == YesNoAsk.YES:
 
214
 
215
  results = await adobe_help_search.ainvoke(query)
216
 
 
 
217
  urls = list(r["url"] for r in results)
218
  tool = TavilyExtract(
219
  extract_depth="advanced",
220
  include_images=False,
221
  )
 
222
 
223
  results = await tool.ainvoke({"urls": urls})
224
 
@@ -235,12 +244,14 @@ async def search_help(
235
  query=query,
236
  text="\n***\n".join(all_text),
237
  )
 
238
 
239
  url_summary = await llm.ainvoke([HumanMessage(content=prompt)])
240
  response["messages"].append(url_summary)
241
  response["url_references"].extend(results["results"])
 
242
 
243
- return Command(update=response, goto=route_is_complete.__name__)
244
 
245
 
246
  async def search_rag(
@@ -256,7 +267,10 @@ async def search_rag(
256
  dict: Updated state with RAG response and video references
257
  """
258
 
259
- chain = create_transcript_rag_chain(datastore, config)
 
 
 
260
  query = state["search_query"][-1]
261
 
262
  response = await chain.ainvoke({"question": query})
@@ -519,7 +533,7 @@ def initialize(
519
  graph_builder.add_edge(init_state.__name__, route_is_relevant.__name__)
520
  graph_builder.add_edge(research.__name__, search_help.__name__)
521
  graph_builder.add_edge(research.__name__, search_rag.__name__)
522
- # graph_builder.add_edge(search_help.__name__, route_is_complete.__name__)
523
  graph_builder.add_edge(search_rag.__name__, route_is_complete.__name__)
524
 
525
  graph_builder.add_edge(write_answer.__name__, END)
@@ -567,8 +581,13 @@ async def graph(config: RunnableConfig = None):
567
 
568
  # Start datastore population as background task (non-blocking)
569
  if initialize_datastore:
570
- asyncio.create_task(
571
- _datastore.from_json_globs(Configuration().transcript_glob)
572
- )
 
 
 
 
 
573
 
574
  return _compiled_graph
 
151
  }
152
 
153
 
154
+ async def search_help(state: TutorialState, config: RunnableConfig):
 
 
155
  """Search Adobe Help documentation for relevant information.
156
 
157
  Args:
 
162
  dict: Updated state with search results message and URL references
163
  """
164
 
165
+ configurable = await asyncio.to_thread(
166
+ Configuration.from_runnable_config, config
167
+ )
168
+ logging.info("search_help: loaded config")
169
  cls = get_chat_api(configurable.llm_api)
170
  llm = cls(model=configurable.llm_tool_model, temperature=0)
171
  prompt = NODE_PROMPTS["search_summary"]
172
 
173
+ logging.info("search_help: configured llm")
174
+
175
  adobe_help_search = TavilySearchResults(
176
  max_results=2,
177
  include_domains=["helpx.adobe.com"],
 
180
  include_images=True,
181
  response_format="content_and_artifact", # Always returns artifacts
182
  )
183
+ logging.info("search_help: configured tavily")
184
+
185
  query = state["search_query"][-1]
186
 
187
  decision = state["search_permission"]
188
  if decision == YesNoAsk.ASK:
189
+ logging.info("search_help: asking permission")
190
 
191
  response = interrupt(
192
  (
 
197
 
198
  logging.info(f"Permission response '{response}'")
199
  decision = YesNoAsk.YES if "yes" in response.strip() else YesNoAsk.NO
200
+ return {"search_permission": decision}
 
 
201
 
202
  response = {
203
  "messages": [],
 
211
  longform = f"Query '{query}' is permitted."
212
  else:
213
  longform = f"Query '{query}' is NOT permitted."
214
+ logging.info("search_help: %s", longform)
215
 
216
  response["messages"].append({"role": "human", "content": longform})
217
 
218
  if decision == YesNoAsk.YES:
219
+ logging.info("search_help: searching")
220
 
221
  results = await adobe_help_search.ainvoke(query)
222
 
223
+ logging.info("search_help: results")
224
+
225
  urls = list(r["url"] for r in results)
226
  tool = TavilyExtract(
227
  extract_depth="advanced",
228
  include_images=False,
229
  )
230
+ logging.info("search_help: extract text")
231
 
232
  results = await tool.ainvoke({"urls": urls})
233
 
 
244
  query=query,
245
  text="\n***\n".join(all_text),
246
  )
247
+ logging.info("search_help: text extracted. summarizing.")
248
 
249
  url_summary = await llm.ainvoke([HumanMessage(content=prompt)])
250
  response["messages"].append(url_summary)
251
  response["url_references"].extend(results["results"])
252
+ logging.info("search_help: summary complete.")
253
 
254
+ return response
255
 
256
 
257
  async def search_rag(
 
267
  dict: Updated state with RAG response and video references
268
  """
269
 
270
+ configurable = await asyncio.to_thread(
271
+ Configuration.from_runnable_config, config
272
+ )
273
+ chain = create_transcript_rag_chain(datastore, configurable)
274
  query = state["search_query"][-1]
275
 
276
  response = await chain.ainvoke({"question": query})
 
533
  graph_builder.add_edge(init_state.__name__, route_is_relevant.__name__)
534
  graph_builder.add_edge(research.__name__, search_help.__name__)
535
  graph_builder.add_edge(research.__name__, search_rag.__name__)
536
+ graph_builder.add_edge(search_help.__name__, route_is_complete.__name__)
537
  graph_builder.add_edge(search_rag.__name__, route_is_complete.__name__)
538
 
539
  graph_builder.add_edge(write_answer.__name__, END)
 
581
 
582
  # Start datastore population as background task (non-blocking)
583
  if initialize_datastore:
584
+
585
+ async def load_datastore():
586
+ configurable = await asyncio.to_thread(Configuration)
587
+ await asyncio.to_thread(
588
+ _datastore.from_json_globs, configurable.transcript_glob
589
+ )
590
+
591
+ asyncio.create_task(load_datastore())
592
 
593
  return _compiled_graph
pstuts_rag/pstuts_rag/rag_for_transcripts.py CHANGED
@@ -107,7 +107,7 @@ def strip_think_tags(input: str) -> str:
107
 
108
  def create_transcript_rag_chain(
109
  datastore: Datastore,
110
- config: Union[RunnableConfig, Configuration] = Configuration(),
111
  ) -> Runnable:
112
  """Create a Retrieval-Augmented Generation (RAG) chain for video transcript search.
113
 
@@ -119,19 +119,15 @@ def create_transcript_rag_chain(
119
 
120
  Args:
121
  datastore: The DatastoreManager containing video transcript embeddings
122
- config: Configuration object or RunnableConfig with model and retrieval settings
123
 
124
  Returns:
125
  Runnable: A LangChain runnable that processes questions and returns
126
  answers with embedded references to source video segments
127
  """
128
 
129
- # Handle both Configuration objects and RunnableConfig dictionaries
130
- configurable = (
131
- config
132
- if isinstance(config, Configuration)
133
- else Configuration.from_runnable_config(config)
134
- )
135
 
136
  # Select the appropriate chat model class based on configuration
137
  cls = ChatAPISelector.get(configurable.llm_api, ChatOpenAI)
 
107
 
108
  def create_transcript_rag_chain(
109
  datastore: Datastore,
110
+ config: Configuration = Configuration(),
111
  ) -> Runnable:
112
  """Create a Retrieval-Augmented Generation (RAG) chain for video transcript search.
113
 
 
119
 
120
  Args:
121
  datastore: The DatastoreManager containing video transcript embeddings
122
+ config: Configuration object with model and retrieval settings
123
 
124
  Returns:
125
  Runnable: A LangChain runnable that processes questions and returns
126
  answers with embedded references to source video segments
127
  """
128
 
129
+ # Use the Configuration object directly
130
+ configurable = config
 
 
 
 
131
 
132
  # Select the appropriate chat model class based on configuration
133
  cls = ChatAPISelector.get(configurable.llm_api, ChatOpenAI)