import pickle import os from similarity_fast import SimilarityFast import pandas as pd from utils import generate_embedded_dictionary # Function to load data from the pickle file def load_pickle(file_path): if os.path.exists(file_path): with open(file_path, 'rb') as f: data = pickle.load(f) return data else: raise FileNotFoundError(f"No pickle file found at {file_path}") # Function to save data to the pickle file def save_pickle(data, file_path): with open(file_path, 'wb') as f: pickle.dump(data, f) # Function to update the data def update_data(data, new_data): data.update(new_data) return data def preprocess_dictionary_word(self, text): text = text.strip().lower() text = text.replace(", raw", "").replace(" raw", "") text = text.replace(", nfs", "").replace(" nfs", "") if ',' in text: parts = [part.strip() for part in text.split(',')] text = ' '.join(reversed(parts)) return text # Load the existing data from the pickle file pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl'] for pickle_file_path in pickle_file_paths: data = load_pickle(pickle_file_path) algo_fast = SimilarityFast(None) csv_file_path = './dictionary/additions.csv' df_dictionary = pd.read_csv(csv_file_path) dictionary = df_dictionary['description'].astype(str).tolist() new_entries = generate_embedded_dictionary(dictionary, algo_fast.model, algo_fast.preprocess_dictionary_word) updated_data = update_data(data, new_entries) print("Updated Data") # Save the updated data back to the pickle file print("Saving data to pickle file...") save_pickle(updated_data, pickle_file_path) print(f"Data saved to {pickle_file_path}")