Spaces:
Paused
Paused
File size: 1,726 Bytes
9189e38 c92bac3 ee698cf 9189e38 a1c159e 9189e38 a1c159e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import pickle
import os
import pandas as pd
import psycopg2
from psycopg2.extras import DictCursor
from utils import generate_embedding
from db.db_utils import get_connection
pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl']
def load_pickle(file_path):
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
data = pickle.load(f)
return data
else:
raise FileNotFoundError(f"No pickle file found at {file_path}")
def save_pickle(data, file_path):
with open(file_path, 'wb') as f:
pickle.dump(data, f)
def update_data(data, new_data):
data.update(new_data)
return data
def run_mappings_to_embeddings(model):
db_conn = get_connection()
db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
# select all mappings that have not been reviewed
db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = true and flagged = false")
results = db_cursor.fetchall()
for pickle_file_path in pickle_file_paths:
new_entries = {}
data = load_pickle(pickle_file_path)
for row in results:
input_word = row[0]
dictionary_word = row[1]
new_entries[input_word] = {
'v': generate_embedding(model, input_word),
'd': dictionary_word
}
updated_data = update_data(data, new_entries)
print("Updated Data")
# Save the updated data back to the pickle file
print("Saving data to pickle file...")
save_pickle(updated_data, pickle_file_path)
print(f"Data saved to {pickle_file_path}")
return new_entries
|