Spaces:
Running
Running
File size: 643 Bytes
b4b5bdf a3a378d b4b5bdf 0b9f9a6 a3a378d fc1544a 1203b67 2f93ee4 51727c4 b4b5bdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
import pandas as pd
from buster.documents_manager import DeepLakeDocumentsManager
if __name__ == "__main__":
vector_store_path = "/Users/louis/Downloads/wiki_tai_langchain_hf_llm"
chunk_file = "./llm_course.csv"
overwrite = False
df = pd.read_csv(chunk_file)
print(f"before drop: {len(df)}")
df = df.dropna()
print(f"after drop: {len(df)}")
dm = DeepLakeDocumentsManager(
vector_store_path,
overwrite=overwrite,
required_columns=["url", "source", "content", "title"],
)
dm.batch_add(df)
zipped_file_path = dm.to_zip()
print(f"Contents zipped to: {zipped_file_path}")
|