Spaces:
Runtime error
Runtime error
import os | |
import pandas as pd | |
from storage import query_to_df | |
from cleaning import TextCleaner | |
from sentence_transformers import SentenceTransformer | |
def main(library_name, query, max_results, model_name): | |
path_to_library = os.path.join("./data/libraries", library_name) | |
os.mkdir(path_to_library) | |
## Generate metadata from query and save | |
metadata = query_to_df(query=query, max_results=max_results) | |
metadata.to_feather(os.path.join(path_to_library, "metadata.feather")) | |
## Process and clean the data | |
sentences = TextCleaner().transform(metadata) | |
## Generate and save embeddings | |
embeddings = SentenceTransformer(model_name_or_path=model_name).encode( | |
sentences=sentences, show_progress_bar=True | |
) | |
embeddings_df = pd.DataFrame(embeddings) | |
embeddings_df.columns = [str(col_name) for col_name in embeddings_df.columns] | |
embeddings_df.to_feather(os.path.join(path_to_library, "embeddings.feather")) | |