| | from pathlib import Path |
| | from langchain_openai import OpenAIEmbeddings |
| | from langchain_chroma import Chroma |
| | from dotenv import load_dotenv |
| | import os |
| | import sqlite3 |
| |
|
| | load_dotenv() |
| |
|
| |
|
| | def test_load_embeddings(): |
| | print("=== Testing Embeddings Load ===") |
| |
|
| | base_dir = Path.cwd() |
| | chroma_dir = base_dir / "data" / "processed" / "embeddings" / "chroma" |
| |
|
| | |
| | print("\nTesting SQLite database:") |
| | try: |
| | conn = sqlite3.connect(str(chroma_dir / "chroma.sqlite3")) |
| | cursor = conn.cursor() |
| | |
| | cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") |
| | tables = cursor.fetchall() |
| | print(f"Found tables: {tables}") |
| |
|
| | |
| | for table in tables: |
| | cursor.execute(f"SELECT COUNT(*) FROM {table[0]};") |
| | count = cursor.fetchone()[0] |
| | print(f"Table {table[0]}: {count} records") |
| | except Exception as e: |
| | print(f"SQLite Error: {str(e)}") |
| | finally: |
| | if 'conn' in locals(): |
| | conn.close() |
| |
|
| | |
| | print("\nTesting ChromaDB load:") |
| | try: |
| | embeddings = OpenAIEmbeddings( |
| | openai_api_key=os.getenv("OPENAI_API_KEY") |
| | ) |
| |
|
| | db = Chroma( |
| | persist_directory=str(chroma_dir), |
| | embedding_function=embeddings, |
| | collection_name="langchain" |
| | ) |
| | print("\nChroma instance created") |
| | print(f"Collection names: {db._client.list_collections()}") |
| |
|
| | |
| | collection = db._client.get_collection("langchain") |
| | print(f"\nCollection count: {collection.count()}") |
| | print(f"Collection peek: {collection.peek()}") |
| |
|
| | except Exception as e: |
| | print(f"\nChroma Error: {str(e)}") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | test_load_embeddings() |
| |
|