Spaces:
Sleeping
Sleeping
| from pinecone import Pinecone | |
| from langchain_openai import ChatOpenAI | |
| from src.vectorstore.pinecone_db import ingest_data, get_retriever, load_documents, process_chunks, save_to_parquet | |
| from src.agents.workflow import run_adaptive_rag | |
| from langgraph.pregel import GraphRecursionError | |
| import tempfile | |
| import os | |
| from pathlib import Path | |
| def initialize_pinecone(api_key): | |
| """Initialize Pinecone client with API key.""" | |
| try: | |
| return Pinecone(api_key=api_key) | |
| except Exception as e: | |
| print(f"Error initializing Pinecone: {str(e)}") | |
| return None | |
| def initialize_llm(api_key): | |
| """Initialize OpenAI LLM.""" | |
| try: | |
| return ChatOpenAI(api_key=api_key, model="gpt-3.5-turbo") | |
| except Exception as e: | |
| print(f"Error initializing OpenAI: {str(e)}") | |
| return None | |
| def process_documents(file_paths, pc): | |
| """Process documents and store in Pinecone.""" | |
| if not file_paths: | |
| print("No documents provided.") | |
| return None | |
| print("Processing documents...") | |
| temp_dir = tempfile.mkdtemp() | |
| markdown_path = Path(temp_dir) / "combined.md" | |
| parquet_path = Path(temp_dir) / "documents.parquet" | |
| try: | |
| markdown_path = load_documents(file_paths, output_path=markdown_path) | |
| chunks = process_chunks(markdown_path, chunk_size=256, threshold=0.6) | |
| parquet_path = save_to_parquet(chunks, parquet_path) | |
| ingest_data( | |
| pc=pc, | |
| parquet_path=parquet_path, | |
| text_column="text", | |
| pinecone_client=pc | |
| ) | |
| retriever = get_retriever(pc) | |
| print("Documents processed successfully!") | |
| return retriever | |
| except Exception as e: | |
| print(f"Error processing documents: {str(e)}") | |
| return None | |
| finally: | |
| try: | |
| os.remove(markdown_path) | |
| os.remove(parquet_path) | |
| os.rmdir(temp_dir) | |
| except: | |
| pass | |
| def main(): | |
| # Get API keys | |
| pinecone_api_key = input("Enter your Pinecone API key: ") | |
| openai_api_key = input("Enter your OpenAI API key: ") | |
| # Initialize clients | |
| pc = initialize_pinecone(pinecone_api_key) | |
| if not pc: | |
| return | |
| llm = initialize_llm(openai_api_key) | |
| if not llm: | |
| return | |
| # Get document paths | |
| print("\nEnter the paths to your documents (one per line).") | |
| print("Press Enter twice when done:") | |
| file_paths = [] | |
| while True: | |
| path = input() | |
| if not path: | |
| break | |
| if os.path.exists(path): | |
| file_paths.append(path) | |
| else: | |
| print(f"Warning: File {path} does not exist") | |
| # Process documents | |
| retriever = process_documents(file_paths, pc) | |
| if not retriever: | |
| return | |
| # Chat loop | |
| print("\nChat with your documents! Type 'exit' to quit.") | |
| while True: | |
| question = input("\nYou: ") | |
| if question.lower() == 'exit': | |
| print("Goodbye!") | |
| break | |
| try: | |
| response = run_adaptive_rag( | |
| retriever=retriever, | |
| question=question, | |
| llm=llm, | |
| top_k=5, | |
| enable_websearch=False | |
| ) | |
| print("\nAssistant:", response) | |
| except GraphRecursionError: | |
| print("\nAssistant: I cannot find a sufficient answer to your question in the provided documents. Please try rephrasing your question or ask something else about the content of the documents.") | |
| except Exception as e: | |
| print(f"\nError: {str(e)}") | |
| if __name__ == "__main__": | |
| main() |