Spaces:

anindya-hf-2002
/

Adaptive-RAG

Sleeping

App Files Files Community

Adaptive-RAG / main.py

anindya-hf-2002

Upload 21 files

b6d19d9 verified 12 months ago

raw

history blame contribute delete

3.66 kB

	from pinecone import Pinecone
	from langchain_openai import ChatOpenAI
	from src.vectorstore.pinecone_db import ingest_data, get_retriever, load_documents, process_chunks, save_to_parquet
	from src.agents.workflow import run_adaptive_rag
	from langgraph.pregel import GraphRecursionError
	import tempfile
	import os
	from pathlib import Path

	def initialize_pinecone(api_key):
	"""Initialize Pinecone client with API key."""
	try:
	return Pinecone(api_key=api_key)
	except Exception as e:
	print(f"Error initializing Pinecone: {str(e)}")
	return None

	def initialize_llm(api_key):
	"""Initialize OpenAI LLM."""
	try:
	return ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo")
	except Exception as e:
	print(f"Error initializing OpenAI: {str(e)}")
	return None

	def process_documents(file_paths, pc):
	"""Process documents and store in Pinecone."""
	if not file_paths:
	print("No documents provided.")
	return None

	print("Processing documents...")
	temp_dir = tempfile.mkdtemp()
	markdown_path = Path(temp_dir) / "combined.md"
	parquet_path = Path(temp_dir) / "documents.parquet"

	try:
	markdown_path = load_documents(file_paths, output_path=markdown_path)
	chunks = process_chunks(markdown_path, chunk_size=256, threshold=0.6)
	parquet_path = save_to_parquet(chunks, parquet_path)

	ingest_data(
	pc=pc,
	parquet_path=parquet_path,
	text_column="text",
	pinecone_client=pc
	)

	retriever = get_retriever(pc)
	print("Documents processed successfully!")
	return retriever

	except Exception as e:
	print(f"Error processing documents: {str(e)}")
	return None
	finally:
	try:
	os.remove(markdown_path)
	os.remove(parquet_path)
	os.rmdir(temp_dir)
	except:
	pass

	def main():
	# Get API keys
	pinecone_api_key = input("Enter your Pinecone API key: ")
	openai_api_key = input("Enter your OpenAI API key: ")

	# Initialize clients
	pc = initialize_pinecone(pinecone_api_key)
	if not pc:
	return

	llm = initialize_llm(openai_api_key)
	if not llm:
	return

	# Get document paths
	print("\nEnter the paths to your documents (one per line).")
	print("Press Enter twice when done:")

	file_paths = []
	while True:
	path = input()
	if not path:
	break
	if os.path.exists(path):
	file_paths.append(path)
	else:
	print(f"Warning: File {path} does not exist")

	# Process documents
	retriever = process_documents(file_paths, pc)
	if not retriever:
	return

	# Chat loop
	print("\nChat with your documents! Type 'exit' to quit.")
	while True:
	question = input("\nYou: ")

	if question.lower() == 'exit':
	print("Goodbye!")
	break

	try:
	response = run_adaptive_rag(
	retriever=retriever,
	question=question,
	llm=llm,
	top_k=5,
	enable_websearch=False
	)
	print("\nAssistant:", response)

	except GraphRecursionError:
	print("\nAssistant: I cannot find a sufficient answer to your question in the provided documents. Please try rephrasing your question or ask something else about the content of the documents.")

	except Exception as e:
	print(f"\nError: {str(e)}")

	if __name__ == "__main__":
	main()