Spaces:
Sleeping
Sleeping
import os | |
from config import OPENAI_API_KEY, file_Directory | |
from langchain_community.document_loaders.csv_loader import CSVLoader | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
import pandas as pd | |
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY | |
# df = pd.read_excel(r"/home/vrush/Catalog-Digitization-/src/module/data/Catalog Digitization/ONDC Test Data _ Images/ONDCSampleData.xlsx") | |
# df_new = pd.DataFrame(columns=["id", "name"]) | |
# df_new = df['name'] | |
# df_new.to_csv(r"data/data.csv", index=False) | |
def create_vector(): | |
loader = CSVLoader(file_path="data/data.csv") | |
docs = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
documents = text_splitter.split_documents(docs) | |
db_path = os.path.join(file_Directory,"vectorstore") | |
embeddings = OpenAIEmbeddings() | |
os.makedirs(db_path, exist_ok=True) | |
Chroma.from_documents(docs, embeddings, persist_directory= db_path) | |
def search(query): | |
embeddings = OpenAIEmbeddings() | |
db_path = os.path.join(file_Directory,"vectorstore") | |
db = Chroma(persist_directory= db_path, embedding_function= embeddings) | |
embedding_vector = OpenAIEmbeddings().embed_query(query) | |
docs = db.similarity_search_by_vector(embedding_vector) | |
print(docs[0].page_content) | |
if __name__ == "__main__": | |
create_vector() | |
search("Choco Creme Wafers") |