import pickle
import os
import pandas as pd
import psycopg2
from psycopg2.extras import DictCursor
from similarity_fast import SimilarityFast
from utils import generate_embedding
from db.db_utils import get_connection


def load_pickle(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'rb') as f:
            data = pickle.load(f)
        return data
    else:
        raise FileNotFoundError(f"No pickle file found at {file_path}")

def save_pickle(data, file_path):
    with open(file_path, 'wb') as f:
        pickle.dump(data, f)

def update_data(data, new_data):
    data.update(new_data)
    return data

pickle_file_paths = ['./embeddings/fast/sentence-transformers-all-mpnet-base-v2.pkl', './embeddings/slow/sentence-transformers-all-mpnet-base-v2.pkl']

db_conn = get_connection()
db_cursor = db_conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

# select all mappings that have not been reviewed
db_cursor.execute("SELECT input_word, dictionary_word FROM mappings WHERE reviewed = 1")
results = db_cursor.fetchall()


for pickle_file_path in pickle_file_paths:
    new_entries = {}
    data = load_pickle(pickle_file_path)

    algo_fast = SimilarityFast(None)

    for row in results:
        input_word = row[0]
        dictionary_word = row[1]
        new_entries[input_word] = {
            'v': generate_embedding(algo_fast.model, input_word),
            'd': dictionary_word
        }

    updated_data = update_data(data, new_entries)
    print("Updated Data")

    # Save the updated data back to the pickle file
    print("Saving data to pickle file...")
    save_pickle(updated_data, pickle_file_path)
    print(f"Data saved to {pickle_file_path}")