Spaces:
Sleeping
Sleeping
refactor: Update search_help function and improve logging
Browse files- Simplified the signature of the `search_help` function by removing unnecessary parameters.
- Enhanced logging throughout the `search_help` function to provide better insights into its execution flow.
- Updated the `create_transcript_rag_chain` function to accept only `Configuration` type, improving type safety.
- Refactored datastore loading in the `graph` function to utilize async for better performance.
pstuts_rag/pstuts_rag/datastore.py
CHANGED
|
@@ -152,7 +152,7 @@ class Datastore:
|
|
| 152 |
except ValueError:
|
| 153 |
self.qdrant_client.get_collection(self.collection_name)
|
| 154 |
logging.info(f"Collection {self.collection_name} already exists.")
|
| 155 |
-
self.reload =
|
| 156 |
|
| 157 |
# wrapper around the client
|
| 158 |
self.vector_store = QdrantVectorStore(
|
|
|
|
| 152 |
except ValueError:
|
| 153 |
self.qdrant_client.get_collection(self.collection_name)
|
| 154 |
logging.info(f"Collection {self.collection_name} already exists.")
|
| 155 |
+
self.reload = self.config.eva_reinitialize
|
| 156 |
|
| 157 |
# wrapper around the client
|
| 158 |
self.vector_store = QdrantVectorStore(
|
pstuts_rag/pstuts_rag/nodes.py
CHANGED
|
@@ -151,9 +151,7 @@ def research(state: TutorialState, config: RunnableConfig):
|
|
| 151 |
}
|
| 152 |
|
| 153 |
|
| 154 |
-
async def search_help(
|
| 155 |
-
state: TutorialState, config: RunnableConfig
|
| 156 |
-
) -> Command[Literal["search_help", "route_is_complete"]]:
|
| 157 |
"""Search Adobe Help documentation for relevant information.
|
| 158 |
|
| 159 |
Args:
|
|
@@ -164,11 +162,16 @@ async def search_help(
|
|
| 164 |
dict: Updated state with search results message and URL references
|
| 165 |
"""
|
| 166 |
|
| 167 |
-
configurable =
|
|
|
|
|
|
|
|
|
|
| 168 |
cls = get_chat_api(configurable.llm_api)
|
| 169 |
llm = cls(model=configurable.llm_tool_model, temperature=0)
|
| 170 |
prompt = NODE_PROMPTS["search_summary"]
|
| 171 |
|
|
|
|
|
|
|
| 172 |
adobe_help_search = TavilySearchResults(
|
| 173 |
max_results=2,
|
| 174 |
include_domains=["helpx.adobe.com"],
|
|
@@ -177,10 +180,13 @@ async def search_help(
|
|
| 177 |
include_images=True,
|
| 178 |
response_format="content_and_artifact", # Always returns artifacts
|
| 179 |
)
|
|
|
|
|
|
|
| 180 |
query = state["search_query"][-1]
|
| 181 |
|
| 182 |
decision = state["search_permission"]
|
| 183 |
if decision == YesNoAsk.ASK:
|
|
|
|
| 184 |
|
| 185 |
response = interrupt(
|
| 186 |
(
|
|
@@ -191,9 +197,7 @@ async def search_help(
|
|
| 191 |
|
| 192 |
logging.info(f"Permission response '{response}'")
|
| 193 |
decision = YesNoAsk.YES if "yes" in response.strip() else YesNoAsk.NO
|
| 194 |
-
return
|
| 195 |
-
update={"search_permission": decision}, goto=search_help.__name__
|
| 196 |
-
)
|
| 197 |
|
| 198 |
response = {
|
| 199 |
"messages": [],
|
|
@@ -207,18 +211,23 @@ async def search_help(
|
|
| 207 |
longform = f"Query '{query}' is permitted."
|
| 208 |
else:
|
| 209 |
longform = f"Query '{query}' is NOT permitted."
|
|
|
|
| 210 |
|
| 211 |
response["messages"].append({"role": "human", "content": longform})
|
| 212 |
|
| 213 |
if decision == YesNoAsk.YES:
|
|
|
|
| 214 |
|
| 215 |
results = await adobe_help_search.ainvoke(query)
|
| 216 |
|
|
|
|
|
|
|
| 217 |
urls = list(r["url"] for r in results)
|
| 218 |
tool = TavilyExtract(
|
| 219 |
extract_depth="advanced",
|
| 220 |
include_images=False,
|
| 221 |
)
|
|
|
|
| 222 |
|
| 223 |
results = await tool.ainvoke({"urls": urls})
|
| 224 |
|
|
@@ -235,12 +244,14 @@ async def search_help(
|
|
| 235 |
query=query,
|
| 236 |
text="\n***\n".join(all_text),
|
| 237 |
)
|
|
|
|
| 238 |
|
| 239 |
url_summary = await llm.ainvoke([HumanMessage(content=prompt)])
|
| 240 |
response["messages"].append(url_summary)
|
| 241 |
response["url_references"].extend(results["results"])
|
|
|
|
| 242 |
|
| 243 |
-
return
|
| 244 |
|
| 245 |
|
| 246 |
async def search_rag(
|
|
@@ -256,7 +267,10 @@ async def search_rag(
|
|
| 256 |
dict: Updated state with RAG response and video references
|
| 257 |
"""
|
| 258 |
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
| 260 |
query = state["search_query"][-1]
|
| 261 |
|
| 262 |
response = await chain.ainvoke({"question": query})
|
|
@@ -519,7 +533,7 @@ def initialize(
|
|
| 519 |
graph_builder.add_edge(init_state.__name__, route_is_relevant.__name__)
|
| 520 |
graph_builder.add_edge(research.__name__, search_help.__name__)
|
| 521 |
graph_builder.add_edge(research.__name__, search_rag.__name__)
|
| 522 |
-
|
| 523 |
graph_builder.add_edge(search_rag.__name__, route_is_complete.__name__)
|
| 524 |
|
| 525 |
graph_builder.add_edge(write_answer.__name__, END)
|
|
@@ -567,8 +581,13 @@ async def graph(config: RunnableConfig = None):
|
|
| 567 |
|
| 568 |
# Start datastore population as background task (non-blocking)
|
| 569 |
if initialize_datastore:
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
return _compiled_graph
|
|
|
|
| 151 |
}
|
| 152 |
|
| 153 |
|
| 154 |
+
async def search_help(state: TutorialState, config: RunnableConfig):
|
|
|
|
|
|
|
| 155 |
"""Search Adobe Help documentation for relevant information.
|
| 156 |
|
| 157 |
Args:
|
|
|
|
| 162 |
dict: Updated state with search results message and URL references
|
| 163 |
"""
|
| 164 |
|
| 165 |
+
configurable = await asyncio.to_thread(
|
| 166 |
+
Configuration.from_runnable_config, config
|
| 167 |
+
)
|
| 168 |
+
logging.info("search_help: loaded config")
|
| 169 |
cls = get_chat_api(configurable.llm_api)
|
| 170 |
llm = cls(model=configurable.llm_tool_model, temperature=0)
|
| 171 |
prompt = NODE_PROMPTS["search_summary"]
|
| 172 |
|
| 173 |
+
logging.info("search_help: configured llm")
|
| 174 |
+
|
| 175 |
adobe_help_search = TavilySearchResults(
|
| 176 |
max_results=2,
|
| 177 |
include_domains=["helpx.adobe.com"],
|
|
|
|
| 180 |
include_images=True,
|
| 181 |
response_format="content_and_artifact", # Always returns artifacts
|
| 182 |
)
|
| 183 |
+
logging.info("search_help: configured tavily")
|
| 184 |
+
|
| 185 |
query = state["search_query"][-1]
|
| 186 |
|
| 187 |
decision = state["search_permission"]
|
| 188 |
if decision == YesNoAsk.ASK:
|
| 189 |
+
logging.info("search_help: asking permission")
|
| 190 |
|
| 191 |
response = interrupt(
|
| 192 |
(
|
|
|
|
| 197 |
|
| 198 |
logging.info(f"Permission response '{response}'")
|
| 199 |
decision = YesNoAsk.YES if "yes" in response.strip() else YesNoAsk.NO
|
| 200 |
+
return {"search_permission": decision}
|
|
|
|
|
|
|
| 201 |
|
| 202 |
response = {
|
| 203 |
"messages": [],
|
|
|
|
| 211 |
longform = f"Query '{query}' is permitted."
|
| 212 |
else:
|
| 213 |
longform = f"Query '{query}' is NOT permitted."
|
| 214 |
+
logging.info("search_help: %s", longform)
|
| 215 |
|
| 216 |
response["messages"].append({"role": "human", "content": longform})
|
| 217 |
|
| 218 |
if decision == YesNoAsk.YES:
|
| 219 |
+
logging.info("search_help: searching")
|
| 220 |
|
| 221 |
results = await adobe_help_search.ainvoke(query)
|
| 222 |
|
| 223 |
+
logging.info("search_help: results")
|
| 224 |
+
|
| 225 |
urls = list(r["url"] for r in results)
|
| 226 |
tool = TavilyExtract(
|
| 227 |
extract_depth="advanced",
|
| 228 |
include_images=False,
|
| 229 |
)
|
| 230 |
+
logging.info("search_help: extract text")
|
| 231 |
|
| 232 |
results = await tool.ainvoke({"urls": urls})
|
| 233 |
|
|
|
|
| 244 |
query=query,
|
| 245 |
text="\n***\n".join(all_text),
|
| 246 |
)
|
| 247 |
+
logging.info("search_help: text extracted. summarizing.")
|
| 248 |
|
| 249 |
url_summary = await llm.ainvoke([HumanMessage(content=prompt)])
|
| 250 |
response["messages"].append(url_summary)
|
| 251 |
response["url_references"].extend(results["results"])
|
| 252 |
+
logging.info("search_help: summary complete.")
|
| 253 |
|
| 254 |
+
return response
|
| 255 |
|
| 256 |
|
| 257 |
async def search_rag(
|
|
|
|
| 267 |
dict: Updated state with RAG response and video references
|
| 268 |
"""
|
| 269 |
|
| 270 |
+
configurable = await asyncio.to_thread(
|
| 271 |
+
Configuration.from_runnable_config, config
|
| 272 |
+
)
|
| 273 |
+
chain = create_transcript_rag_chain(datastore, configurable)
|
| 274 |
query = state["search_query"][-1]
|
| 275 |
|
| 276 |
response = await chain.ainvoke({"question": query})
|
|
|
|
| 533 |
graph_builder.add_edge(init_state.__name__, route_is_relevant.__name__)
|
| 534 |
graph_builder.add_edge(research.__name__, search_help.__name__)
|
| 535 |
graph_builder.add_edge(research.__name__, search_rag.__name__)
|
| 536 |
+
graph_builder.add_edge(search_help.__name__, route_is_complete.__name__)
|
| 537 |
graph_builder.add_edge(search_rag.__name__, route_is_complete.__name__)
|
| 538 |
|
| 539 |
graph_builder.add_edge(write_answer.__name__, END)
|
|
|
|
| 581 |
|
| 582 |
# Start datastore population as background task (non-blocking)
|
| 583 |
if initialize_datastore:
|
| 584 |
+
|
| 585 |
+
async def load_datastore():
|
| 586 |
+
configurable = await asyncio.to_thread(Configuration)
|
| 587 |
+
await asyncio.to_thread(
|
| 588 |
+
_datastore.from_json_globs, configurable.transcript_glob
|
| 589 |
+
)
|
| 590 |
+
|
| 591 |
+
asyncio.create_task(load_datastore())
|
| 592 |
|
| 593 |
return _compiled_graph
|
pstuts_rag/pstuts_rag/rag_for_transcripts.py
CHANGED
|
@@ -107,7 +107,7 @@ def strip_think_tags(input: str) -> str:
|
|
| 107 |
|
| 108 |
def create_transcript_rag_chain(
|
| 109 |
datastore: Datastore,
|
| 110 |
-
config:
|
| 111 |
) -> Runnable:
|
| 112 |
"""Create a Retrieval-Augmented Generation (RAG) chain for video transcript search.
|
| 113 |
|
|
@@ -119,19 +119,15 @@ def create_transcript_rag_chain(
|
|
| 119 |
|
| 120 |
Args:
|
| 121 |
datastore: The DatastoreManager containing video transcript embeddings
|
| 122 |
-
config: Configuration object
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
Runnable: A LangChain runnable that processes questions and returns
|
| 126 |
answers with embedded references to source video segments
|
| 127 |
"""
|
| 128 |
|
| 129 |
-
#
|
| 130 |
-
configurable =
|
| 131 |
-
config
|
| 132 |
-
if isinstance(config, Configuration)
|
| 133 |
-
else Configuration.from_runnable_config(config)
|
| 134 |
-
)
|
| 135 |
|
| 136 |
# Select the appropriate chat model class based on configuration
|
| 137 |
cls = ChatAPISelector.get(configurable.llm_api, ChatOpenAI)
|
|
|
|
| 107 |
|
| 108 |
def create_transcript_rag_chain(
|
| 109 |
datastore: Datastore,
|
| 110 |
+
config: Configuration = Configuration(),
|
| 111 |
) -> Runnable:
|
| 112 |
"""Create a Retrieval-Augmented Generation (RAG) chain for video transcript search.
|
| 113 |
|
|
|
|
| 119 |
|
| 120 |
Args:
|
| 121 |
datastore: The DatastoreManager containing video transcript embeddings
|
| 122 |
+
config: Configuration object with model and retrieval settings
|
| 123 |
|
| 124 |
Returns:
|
| 125 |
Runnable: A LangChain runnable that processes questions and returns
|
| 126 |
answers with embedded references to source video segments
|
| 127 |
"""
|
| 128 |
|
| 129 |
+
# Use the Configuration object directly
|
| 130 |
+
configurable = config
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
# Select the appropriate chat model class based on configuration
|
| 133 |
cls = ChatAPISelector.get(configurable.llm_api, ChatOpenAI)
|