import os
import csv
import json
import time
import heapq
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from Levenshtein import distance
from tqdm import tqdm
from db.db_utils import get_connection, store_mapping_to_db, get_mapping_from_db
from ask_gpt import query_gpt

# For any unreviewed mappings, we ask chatgpt to consider:
# 1. The similar_words list
# 2. Similar words from the dictionary based on small levenstein distance

# ChatGPT should confirm that the current mapping is the best one. If not, they should provide the better mapping.
# If its a Non-Food Item, we should confirm that
# If it's a homogenous or hetergeneous mixture, we should confirm that

load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

output_file_path = f'./audits/{int(time.time())}.csv'

def update_csv(results):
    df_results = pd.DataFrame(results, columns=['input_word', 'original_dictionary_word', 'new_dictionary_word',])    
    df_results.to_csv(output_file_path, index=False)

def find_close_levenshtein_words(input_word, dictionary, threshold=3):
    # Calculate Levenshtein distances for each word in the dictionary
    close_words = [word for word in dictionary if distance(input_word, word) <= threshold]
    return close_words

def query_gpt(food_item, dictionary_word, similar_words):
    line_separated_words = '\n'.join(similar_words)

    prompt = (
      f"""I have a particular food item and a mapping to a USDA word. Can you confirm if the food item is most similar to the mapping?

      Generally, you should prefer the mapped word, but if you believe there is a better fit, please provide it.

      I will also provide a list of other similar words that you could be a better fit.

      This is important: only return a word from the list of words I provide.

      If it's not a food item or pet food, return 'Non-Food Item'.

      You should respond in JSON format with an object that has the key `guess`, and the value is the most similar food item.

      The food item is: "{food_item}"
      It has been mapped to: "{dictionary_word}"

      Similar words:
      {line_separated_words}"""
    )

    completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        model="gpt-3.5-turbo-1106",
        response_format={"type": "json_object"},
    )
    response = completion.choices[0].message.content
    parsed = parse_response(response)
    print(f"Q: '{food_item}'")
    print(f"A: '{parsed}'")
    print()
    return parsed

# Define the function to parse the GPT response
def parse_response(response):
    try:
        result = json.loads(response)
        return result['guess']
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error parsing response: {response} - {e}")
        return None


db_conn = get_connection()
db_cursor = db_conn.cursor()

# Load the dictionary
db_cursor.execute("SELECT description FROM dictionary")
dictionary = db_cursor.fetchall()
dictionary = [item[0] for item in dictionary]

# select all mappings that have not been reviewed
db_cursor.execute("SELECT input_word, dictionary_word, similar_words FROM mappings WHERE reviewed = 0")
results = db_cursor.fetchall()

# iterate through each row, grab the input_word and ask chatgpt to compare it to the dictionary_word

print("Soft drink, NFS" in dictionary)
print(dictionary)
print("ensure dictionary works before we start")

# csv_data = []
# for row in results:
#     input_word = row[0]
#     dictionary_word = row[1]
#     similar_words = [item.strip() for item in row[2].split('|')]
    
#     # find words from the dictionary list based on small levenstein distance between input_word and each word in the dictionary
#     levenshtein_words = find_close_levenshtein_words(input_word, dictionary)
#     print(f"Input: {input_word}")
#     print(f" - dictionary_word: {dictionary_word}")
#     print(f" - similar_words: {similar_words}")
#     print(f" - levenshtein_words: {levenshtein_words}")

#     # concatenate the similar_words and levenshtein_words
#     all_words = similar_words + levenshtein_words
#     all_words = list(set(all_words))  # remove duplicates
#     response = query_gpt(input_word, dictionary_word, all_words)
#     if response:
#         csv_data.append({
#             'input_word': input_word,
#             'original_dictionary_word': dictionary_word,
#             'new_dictionary_word': response
#         })
#         if response == dictionary_word and response in dictionary:
#             print(f" - Mapping is correct")
#             db_cursor.execute("UPDATE mappings SET reviewed = 1 WHERE input_word = ?", (input_word,))
#         else:
#             # We should update the mapping in the database
#             # We should replace dictionary_word with response
#             # We should set reviewed to 1
#             # first confirm that the response is in the dictionary
#             if response in dictionary:
#                 print(f" - Updating mapping with {response}")
#                 db_cursor.execute("UPDATE mappings SET dictionary_word = ?, reviewed = 1 WHERE input_word = ?", (response, input_word))
#                 db_conn.commit()
#             else:
#                 print(f" - Response {response} is not in the dictionary")

#         update_csv(csv_data)

# db_conn.close()