import re
import os
import csv
import json
import time
import heapq
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from Levenshtein import distance
from tqdm import tqdm
from db.db_utils import get_connection
from ask_gpt import query_gpt
from utils import generate_embedding, cosine_similarity
from sentence_transformers import SentenceTransformer, util

# For any unreviewed mappings, we ask chatgpt to consider:
# 1. The similar_words list
# 2. Similar words from the dictionary based on small levenstein distance

# ChatGPT should confirm that the current mapping is the best one. If not, they should provide the better mapping.
# If its a Non-Food Item, we should confirm that
# If it's a homogenous or hetergeneous mixture, we should confirm that

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

output_file_path = f'./audits/{int(time.time())}.csv'

model_name = 'sentence-transformers/all-mpnet-base-v2'
model = SentenceTransformer(model_name)

def reverse_string(s):
    # remove all commas, and reverse the words in the string
    s = s.replace(',', '')
    return ' '.join(s.split()[::-1])

def remove_misc_words(text):
    text = text.replace('raw', '').replace('fresh', '').replace('canned', '').replace('prepared', '').replace('with', '').replace('and', '').replace('or', '').replace('in', '').replace('NFS', '').strip()
    text = re.sub(r'\(.*?\)', '', text)
    return text

def compare_embeddings(old_dictionary_word, new_dictionary_word):
    old_dictionary_word = remove_misc_words(old_dictionary_word)
    new_dictionary_word = remove_misc_words(new_dictionary_word)
    old_embedding = generate_embedding(model, old_dictionary_word)
    new_embedding = generate_embedding(model, new_dictionary_word)
    cosine_similarity_score = cosine_similarity(old_embedding, new_embedding)
    return cosine_similarity_score

def update_csv(results):
    df_results = pd.DataFrame(results, columns=['input_word', 'original_dictionary_word', 'new_dictionary_word',])    
    df_results.to_csv(output_file_path, index=False)

def find_close_levenshtein_words(input_word, dictionary, threshold=7):
    # Calculate Levenshtein distances for each word in the dictionary
    close_words = [word for word in dictionary if distance(input_word, word) <= threshold]
    return close_words

def query_gpt(food_item, dictionary_word, similar_words):
    line_separated_words = '\n'.join(similar_words)

    prompt = (
      f"""I have a particular food item and a mapping to a USDA word. Can you confirm if the food item is most similar to the mapping?

      Generally, you should prefer the mapped word, but if you believe there is a better fit from provided list of similar words, please specify it.

      This is important: only return a word from the list of words I provide.

      If it's not a food item or pet food, return 'Non-Food Item'.

      You should respond in JSON format with an object that has the key `guess`, and the value is the most similar food item.

      The food item is: "{food_item}"
      It has been mapped to: "{dictionary_word}"

      Similar words:
      {line_separated_words}"""
    )

    completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-3.5-turbo-1106",
        response_format={"type": "json_object"},
    )
    response = completion.choices[0].message.content
    parsed = parse_response(response)
    print(f"Q: '{food_item}'")
    print(f"A: '{parsed}'")
    return parsed

# Define the function to parse the GPT response
def parse_response(response):
    try:
        result = json.loads(response)
        return result['guess']
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error parsing response: {response} - {e}")
        return None


db_conn = get_connection()
db_cursor = db_conn.cursor()

# Load the dictionary
db_cursor.execute("SELECT description FROM dictionary")
dictionary = db_cursor.fetchall()
dictionary = [item[0] for item in dictionary]

# select all mappings that have not been reviewed
db_cursor.execute("SELECT input_word, dictionary_word, similar_words, is_food FROM mappings WHERE gpt_reviewed = false and is_food = true and dictionary_word != 'Heterogeneous Mixture'")
results = db_cursor.fetchall()

# iterate through each row, grab the input_word and ask chatgpt to compare it to the dictionary_word

csv_data = []
for row in results:
    input_word = row[0]
    dictionary_word = row[1]
    similar_words = [item.strip() for item in row[2].split('|')] if row[2] else []
    is_food = row[3]
    
    # find words from the dictionary list based on small levenstein distance between input_word and each word in the dictionary
    levenshtein_words = find_close_levenshtein_words(input_word, dictionary)
    print()
    print(f"Input: {input_word}")
    print(f" - dictionary_word: {dictionary_word}")
    print(f" - similar_words: {similar_words}")
    print(f" - levenshtein_words: {levenshtein_words}")

    # concatenate the similar_words and levenshtein_words
    all_words = similar_words + levenshtein_words
    all_words = list(set(all_words))  # remove duplicates
    response = query_gpt(input_word, dictionary_word, all_words)
    if response:
        new_row = None
        if response == dictionary_word and response in dictionary:
            print(f" - Mapping is correct")
            db_cursor.execute("UPDATE mappings SET gpt_reviewed = true WHERE input_word = %s", (input_word,))
            db_conn.commit()
        else:
            # We should update the mapping in the database
            # We should replace dictionary_word with response
            # We should set reviewed to 1
            # first confirm that the response is in the dictionary
            if response in dictionary:

                # If the response is similar to the original dictionary word, lets just skip it
                compare_score1 = compare_embeddings(dictionary_word, response)
                compare_score2 = compare_embeddings(reverse_string(dictionary_word), reverse_string(response))

                if compare_score1 > 0.8 or compare_score2 > 0.8:
                    print(f" - Mapping is already similar to the dictionary word")
                    db_cursor.execute("UPDATE mappings SET gpt_reviewed = true WHERE input_word = %s", (input_word,))
                    db_conn.commit()
                    continue
                else:
                    print(f" - Mapping is not similar to the dictionary word")
                    print(f" - Cosine Similarity1: {compare_score1}")
                    print(f" - Cosine Similarity2: {compare_score2}")

                # prompt the user if we want to make the change or not
                print("------")
                print("Updating mapping to the following:")
                print(f" - input_word: {input_word}")
                print(f" - original_dictionary_word: {dictionary_word}")
                print(f" - new_dictionary_word: {response}")
                print("------")

                if response == 'Non-Food Item' and is_food == False:
                    # We can skip this one because it's already marked as a Non-Food Item
                    print(" - Mapping is already marked as a Non-Food Item")
                    confirm = 'y'
                else:
                    confirm = input("Press 'y' to confirm, 'i' to ignore, 'd' to delete, 'm' for mixture, any other key to skip: ")

                if confirm.lower() == 'y':
                    if response == 'Non-Food Item':
                        sql = "UPDATE mappings SET dictionary_word = %s, is_food = FALSE, description = 'Non-Food Item', gpt_reviewed = true, reviewed = true WHERE input_word = %s"
                    else:
                        sql = "UPDATE mappings SET dictionary_word = %s, gpt_reviewed = true, reviewed = true, is_food = true WHERE input_word = %s"
                    
                    print(f" - Updating mapping with {response}")
                    db_cursor.execute(sql, (response, input_word))
                    db_conn.commit()
                    new_row = {
                        'input_word': input_word,
                        'original_dictionary_word': dictionary_word,
                        'new_dictionary_word': response
                    }
                elif confirm.lower() == 'i':
                    print(f" - Ignoring mapping")
                    sql = "UPDATE mappings SET ignore = true, gpt_reviewed = true, reviewed = true WHERE input_word = %s"
                    db_cursor.execute(sql, (input_word,))
                    db_conn.commit()
                elif confirm.lower() == 'd':
                    print(f" - Deleting mapping")
                    sql = "DELETE FROM mappings WHERE input_word = %s"
                    db_cursor.execute(sql, (input_word,))
                    db_conn.commit()
                elif confirm.lower() == 'm':
                    print(f" - Heterogeneous Mixture")
                    sql = "UPDATE mappings SET gpt_reviewed = true, reviewed = true, dictionary_word = 'Heterogeneous Mixture', is_food = true WHERE input_word = %s"
                    db_cursor.execute(sql, (input_word,))
                    db_conn.commit()
                else:
                    db_cursor.execute("UPDATE mappings SET gpt_reviewed = true, reviewed = true WHERE input_word = %s", (input_word,))
                    db_conn.commit()
            else:
                print(f" - Response {response} is not in the dictionary")

        if new_row:
            csv_data.append(new_row)
        update_csv(csv_data)

db_conn.close()