import pandas as pd import numpy as np import os from langchain_core.documents import Document from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma def create_Doc(data): documents = [] for num, i in data.iterrows(): documents.append(Document( page_content=i.lyric, metadata={"name": i.title, "id": num} )) return documents def load_embedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'): embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" ) return embeddings def load_vectorstore(documents, embeddings): vectorstore = Chroma.from_documents( documents, embedding=embeddings, ) return vectorstore def process(list_text, vectorstore, search_type = 'mmr'): vectorstore.as_retriever(search_type= search_type) retrieves = [] for i in list_text: retrieves.append(vectorstore.invoke(i)) return retrieves