Spaces:
Sleeping
Sleeping
from pinecone import Pinecone | |
from langchain_openai import ChatOpenAI | |
from src.vectorstore.pinecone_db import ingest_data, get_retriever, load_documents, process_chunks, save_to_parquet | |
from src.agents.workflow import run_adaptive_rag | |
from langgraph.pregel import GraphRecursionError | |
import tempfile | |
import os | |
from pathlib import Path | |
def initialize_pinecone(api_key): | |
"""Initialize Pinecone client with API key.""" | |
try: | |
return Pinecone(api_key=api_key) | |
except Exception as e: | |
print(f"Error initializing Pinecone: {str(e)}") | |
return None | |
def initialize_llm(api_key): | |
"""Initialize OpenAI LLM.""" | |
try: | |
return ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo") | |
except Exception as e: | |
print(f"Error initializing OpenAI: {str(e)}") | |
return None | |
def process_documents(file_paths, pc): | |
"""Process documents and store in Pinecone.""" | |
if not file_paths: | |
print("No documents provided.") | |
return None | |
print("Processing documents...") | |
temp_dir = tempfile.mkdtemp() | |
markdown_path = Path(temp_dir) / "combined.md" | |
parquet_path = Path(temp_dir) / "documents.parquet" | |
try: | |
markdown_path = load_documents(file_paths, output_path=markdown_path) | |
chunks = process_chunks(markdown_path, chunk_size=256, threshold=0.6) | |
parquet_path = save_to_parquet(chunks, parquet_path) | |
ingest_data( | |
pc=pc, | |
parquet_path=parquet_path, | |
text_column="text", | |
pinecone_client=pc | |
) | |
retriever = get_retriever(pc) | |
print("Documents processed successfully!") | |
return retriever | |
except Exception as e: | |
print(f"Error processing documents: {str(e)}") | |
return None | |
finally: | |
try: | |
os.remove(markdown_path) | |
os.remove(parquet_path) | |
os.rmdir(temp_dir) | |
except: | |
pass | |
def main(): | |
# Get API keys | |
pinecone_api_key = input("Enter your Pinecone API key: ") | |
openai_api_key = input("Enter your OpenAI API key: ") | |
# Initialize clients | |
pc = initialize_pinecone(pinecone_api_key) | |
if not pc: | |
return | |
llm = initialize_llm(openai_api_key) | |
if not llm: | |
return | |
# Get document paths | |
print("\nEnter the paths to your documents (one per line).") | |
print("Press Enter twice when done:") | |
file_paths = [] | |
while True: | |
path = input() | |
if not path: | |
break | |
if os.path.exists(path): | |
file_paths.append(path) | |
else: | |
print(f"Warning: File {path} does not exist") | |
# Process documents | |
retriever = process_documents(file_paths, pc) | |
if not retriever: | |
return | |
# Chat loop | |
print("\nChat with your documents! Type 'exit' to quit.") | |
while True: | |
question = input("\nYou: ") | |
if question.lower() == 'exit': | |
print("Goodbye!") | |
break | |
try: | |
response = run_adaptive_rag( | |
retriever=retriever, | |
question=question, | |
llm=llm, | |
top_k=5, | |
enable_websearch=False | |
) | |
print("\nAssistant:", response) | |
except GraphRecursionError: | |
print("\nAssistant: I cannot find a sufficient answer to your question in the provided documents. Please try rephrasing your question or ask something else about the content of the documents.") | |
except Exception as e: | |
print(f"\nError: {str(e)}") | |
if __name__ == "__main__": | |
main() |