tsrivallabh's picture
Synced repo using 'sync_with_huggingface' Github Action
11cc0d3 verified
raw
history blame contribute delete
795 Bytes
import chromadb
from chromadb.utils import embedding_functions
# Adjust these as needed
CHROMA_PATH = "chroma_db"
COLLECTION_NAME = "pib_titles"
client = chromadb.PersistentClient(path=CHROMA_PATH)
collection = client.get_collection(
name=COLLECTION_NAME,
embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
model_name="all-MiniLM-L6-v2"
)
)
# Retrieve all documents and metadata (ids are always returned)
all_docs = collection.get(include=["documents", "metadatas"])
print("Total documents:", len(all_docs["ids"]))
for i, (doc_id, doc, meta) in enumerate(zip(all_docs["ids"], all_docs["documents"], all_docs["metadatas"])):
print(f"\n--- Document {i+1} ---")
print("ID:", doc_id)
print("Document:", doc)
print("Metadata:", meta)