import pandas as pd
import os
import json
import openai
import ast

from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import JSONLoader

from langchain.vectorstores.chroma import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.schema import HumanMessage, AIMessage

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

import json
from pathlib import Path

from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

from fastapi import FastAPI, HTTPException, Body, File, Form, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
import uvicorn
import datetime


from dotenv import load_dotenv
# from index_store.methods.common import query_jsons, remove_unwanted_string_json
# from index_store.methods.file_handler import save_file
# from index_store.methods.gpt import get_desc_llm, revalidate_final_response_gpt, survey_answer_llm
# from index_store.methods.indexing import data_cleanup, questions_semantic_search, store_to_index_db

os.environ["OPENAI_API_KEY"] = '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'
openai.api_key ='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'
os.environ["ROOT_FOLDER"] = os.getcwd()

app = FastAPI()

origins = ['*']

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
)

@app.get("/")
async def greet():
    return {"message": "Welcome to the survey bot api"}

@app.post("/store")
async def index_file_vector_db(file:UploadFile,iso:str):
    print("--------------------------------------------------------------------") 
    if file is not None:
        await save_file('index_store/surveys_storage_xls',file)
        file_path=os.environ["ROOT_FOLDER"]+'/index_store/surveys_storage_xls/'+file.filename
        print(file_path)
        json_data=data_cleanup(file_path)
        collection_name=store_to_index_db(json_data,iso)

    print("File updated successfully and stored in DB. collection_name :", collection_name)
    print("--------------------------------------------------------------------") 

    return {"message": "File updated successfully and stored in DB","collection_name":collection_name}

@app.post("/search")
async def get_survey_quesitons_answers(query:str,iso:str):
    print("--------------------------------------------------------------------") 
    print("Query:",query)
    print("Iso:",iso)
    collection_name=iso.lower()+"-surveys"+"-v1"
    questions_data = questions_semantic_search(collection_name, query)
    get_questions_only=json.loads(questions_data["questions_without_answers"])
    get_guestion_answers=questions_data["entire_questions_answeres"]
    # print(get_guestion_answers)

    questions_list = []
    for item in get_questions_only:
        question = item["Question"]
        questions_list.append(question)

    questions_string = '\n'.join(questions_list)
    # print(questions_string)

    context=get_desc_llm(query)
    # print(context)
    answers=survey_answer_llm(context,questions_string)
    response_questions_json=json.loads(answers)
    # print(response_questions_json)
    # print("Answer -------->",(response_questions_json))
    final_json=query_jsons(response_questions_json,get_guestion_answers)
    # remove_unwanted_string_json
    response=revalidate_final_response_gpt(context,final_json)
    print(type(response))
    print("Processed Response------->",response)
    print("--------------------------------------------------------------------") 
    return response

# methods for chatbot---------------------------------------------------------------
# Common methods
def query_jsons(answer_json:dict,
                whole_json:dict)->dict:
    
    # Initialize a list to store the results
    results = []

    # Iterate over the questions in answer_json
    for question, answer in answer_json.items():
        # Find the corresponding entry in whole_json
        matching_entry = next((entry for entry in whole_json if entry['Question'] == question), None)
        if matching_entry:
            # Extract the ID and answer options
            entry_id = matching_entry['ID']
            answer_options = matching_entry['Answer_Options']
            # Append the results to the list
            results.append({'ID': entry_id, 'Question': question, 'Answer_Options': answer_options})
    print(results)
    return results


def remove_unwanted_string_json(json_file:dict,
                                uw_str:str)->dict:
    
    filtered_questions = [question for question in json_file['catalogQuestions'] if question['answer'] != uw_str]
    # Create a new dictionary with the filtered questions
    filtered_data = {'catalogQuestions': filtered_questions}

    return filtered_data

# Data preprocessing methods
def process_excel_data(excel_file):
    """
    Process question and answer information from an Excel file, create a CSV file, and return True if successful.

    Args:
        excel_file (str): Path to the Excel file.

    Returns:
        bool: True if the function executed successfully, False otherwise.
    """
    # try:
        # Extract file name and generate output file names
  
    base_name = os.path.splitext(os.path.basename(excel_file))[0]
    csv_file_name = f"{base_name}_output.csv"
    # Read the Excel file
    df = pd.read_excel(excel_file)

    output_data = []

    folder_path = "doc_storage"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Iterate over the rows and extract the information
    for index, row in df.iterrows():
        if pd.notna(row['Question ID']):
            question_id = row['Question ID']
            question = row['Question']
            answer_str = ""
        else:
            if pd.notna(row['Answers']):
                answers = row['Answers']
                if answer_str != '':
                    answer_str += ', '
                answer_str += answers

        if index + 1 < len(df):
            nxt_row = df.iloc[index + 1]
            nxt_question_id = nxt_row['Question ID']

            if pd.notna(nxt_question_id):
                data = {
                    'ID': int(question_id),
                    'Question': str(question).strip(),
                    'Answer_Options': str(answer_str),
                }
                output_data.append(data)

    # Convert the data to a DataFrame
    df_output = pd.DataFrame(output_data)

    output_dir = 'index_store/doc_storage'
    os.makedirs(output_dir, exist_ok=True)
    csv_file_path = os.path.join(output_dir, csv_file_name)
    df_output.to_csv(csv_file_path, index=False)

#   # Save the DataFrame to a CSV file
#   df_output.to_csv('./doc_storage/output.csv', index=False)
#   # Read the Excel file
#   df = pd.read_csv('./doc_storage/output.csv')
#   df=df['Question']
#   # Save DataFrame as a text file
#   df.to_csv('./doc_storage/output.txt', sep='\t', index=False, header=False)


def csv_to_json(csv_file_path):
    """
    Reads a CSV file and converts it to a JSON file.

    Args:
        csv_file_path (str): Path to the CSV file.

    Returns:
        bool: True if the function executed successfully, False otherwise.
    """
    # try:
        # Generate the output file name
    json_file_name = os.path.splitext(os.path.basename(csv_file_path))[0] + '.json'

    output_dir = 'index_store/doc_storage'
    json_file_path = os.path.join(output_dir, json_file_name)

    # Read the CSV file and convert it to JSON
    pd.read_csv(os.path.join(output_dir,csv_file_path)).to_json(json_file_path, orient='records')

    dummy_record = {
    'id': 'No_ID',
    'dummyQ': 'dummy_value2',
    'dummyA': 'dummy_value3',
    # Add more fields as needed
    }   

    with open(json_file_path, 'r') as json_file:
        json_data = json.load(json_file)
    
    json_data.insert(0, dummy_record)
    json_data.insert(1, dummy_record)
    json_data.insert(2, dummy_record)
    # Write the modified JSON data back to the file

    with open(json_file_path, 'w') as json_file:
        json.dump(json_data, json_file)

    return True
    # except Exception as e:
    #     print(f"Error occurred while converting CSV to JSON: {e}")
    #     return False


def get_question_data(df, question, answer_options_flag=True):
    """
    Retrieves the data of a specific question from a DataFrame.

    Args:
        df (pandas.DataFrame): The DataFrame containing the question data.
        question (str): The question to retrieve the data for.

    Returns:
        dict or None: The data of the question in a dictionary format, or None if the question is not found.
    """
    # Filter the DataFrame based on the specified question
    filtered_df = df[df['Question'] == question]

    # Check if the filtered DataFrame is empty
    if filtered_df.empty:
        return None

    # Retrieve the answer options for the question
    answer_options = filtered_df['Answer_Options'].tolist()

    # Check if answer options exist and are not all NaN values
    if not answer_options or all(pd.isna(options) for options in answer_options):
        question_data = {
            'id': str(filtered_df['ID'].iloc[0]),
            'question': question
        }
    else:
        if answer_options_flag:
            question_data = {
                'ID': str(filtered_df['ID'].iloc[0]),
                'question': question,
                'answer_options': answer_options
            }
        else:
            question_data = {
                'ID': str(filtered_df['ID'].iloc[0]),
                'question': question,
            }

    return question_data

# File handler methods

async def save_file(folder,file):

    folder_path = os.path.join(os.environ["ROOT_FOLDER"],folder)
    print("file name ----->",folder_path)
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    file_path = os.path.join(folder_path, file.filename)

    with open(file_path, "wb") as f:
        f.write(await file.read())

def rem_documents(file_path):
    try:
        os.remove(file_path)
        print("File deleted successfully:", file_path)
    except FileNotFoundError:
        print("File not found:", file_path)
    except Exception as e:
        print("An error occurred while deleting the file:", str(e))

# gpt methods
def get_keywords_chatgpt(query: str):
    """
    Retrieves keywords from a sentence using OpenAI Chat API.

    Args:
        query (str): The input sentence or query.

    Returns:
        str: The response message containing the keywords generated by the OpenAI Chat model.
    """

    # Set up the chat conversation with OpenAI Chat API
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert in NLP, who can give keywords from a sentence by removing special characters. If the keyword is about a person, please include the gender."},
            {"role": "user", "content": "Women who are full-time employed and aged 50"},
            {"role": "assistant", "content": "['Female', 'Gender', 'Work', 'Occupation', 'Permanent','age']"},
            {"role": "user", "content": query + " \n please give keywords only, no additional text"}
        ]
    )

    # Print the total number of tokens used in the API call
    print("get_keywords_chatgpt > Total tokens used:", completion["usage"]["total_tokens"])

    # Return the assistant's response
    return completion["choices"][0]["message"]["content"]


def get_desc_llm(desc):
    """
    Retrieves answers to survey questions based on a given description using the OpenAI Chat API.

    Args:
        q_json (str): The JSON representation of survey questions.
        desc (str): The description of the group.

    Returns:
        str: The response message containing the answers to the survey questions in the specified JSON format.
    """

   
# find my description:
    check_desc=f'''
    Description: you are part of a group which can be described as '{desc}'
    Thought:  What do i know about myself?
    Show detailed observations and Action.
    Thought:  What i don't know about myself?
    Don't entire show verbose, don't mention anything additional
                '''
    
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo-0613", 
    temperature=0.3,
    messages=[
        {"role": "system", "content": "You are an expert at finding the context fromt the description"},
        {"role": "user", "content": "You are part of a group which can be described as 'Married women with 2 children'"},
        {"role": "assistant", "content": '''i am a women hence my gender is female, i have 2 children not sure about their age and gender, my relationship status is married. 
         I know about:
           my gender, children, relationship status only
         '''},
        {"role": "user", "content": check_desc }
    ]
    )

    print("get_desc_llm > Total tokens used:", completion["usage"]["total_tokens"])
    return completion["choices"][0]["message"]["content"]


def survey_answer_llm(json_data,desc:str):

    data=get_answers_llm(json_data,desc) 

    print(json.loads(data))
    data_dict = json.loads(data)

    filtered_data = {
        key: value
        for key, value in data_dict.items()
        if value != "NaN"
    }

    filtered_data_json = json.dumps(filtered_data, indent=2)
    print(filtered_data_json)

    # # Filter out objects with answer as "NaN"
    # filtered_objects = [obj for obj in parsed_data['catalogQuestions'] if obj['Answer'] not in ['NaN', 'No', 'None', 'None of the above']]

    # # Update the catalogQuestions array with filtered objects
    # parsed_data['catalogQuestions'] = filtered_objects

    # # Convert the updated data back to JSON
    # updated_data = json.dumps(parsed_data)
    # unescaped_string = json.loads(updated_data)

    # Print the updated JSON data
    return filtered_data_json

def filter_questions_chatgpt(questions: dict, decs: str):
    """
    Filters relavent questions from json using OpenAI Chat API.

    Args:
        query (str): The input sentence or query.

    Returns:
        str: The response message containing the keywords generated by the OpenAI Chat model.
    """
    # Set up the chat conversation with OpenAI Chat API

    # prompt = f'''You are Tan. You are described as "{decs}".
    # Find the relavent questions to you from below list of questions based on your description
    # {questions}
    # Return their IDs in below format:
    # ```["53","39", ...]```
    # Don't add anything to the response other than above format.
    # '''


    # Return their IDs in below format:
    # ```["53","39", ...]```

    # Don't add anything to the response other than above format.

    prompt = f'''
    Questions:
    {questions}
    what are the 5 relevant quuestions in the above json for which we already know the answers from the description below
    Description: {decs}
    '''
    print(prompt)
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        temperature=0.3,
        messages=[
            {"role": "system", "content": "You are an expert at selecting relavent questions from the list of qestions for which answers are available in description and returning in list format"},
            {"role": "user", "content": prompt}
        ]
    )

    # Print the total number of tokens used in the API call
    print("filter_questions_chatgpt > Total tokens used:", completion["usage"]["total_tokens"])

    # Return the assistant's response
    return completion["choices"][0]["message"]["content"]

def get_answers_llm(cont:str,q_json:dict):
    prompt=f'''
      You will be provided with a list of questions in JSON format and should respond to all questions based on the observation provided.
      If you are not sure about the answer, please respond with "NeC" in the JSON format for that question.
      If the answer is not available, please respond with "NaN" in the JSON format for that question.
      Context:
      {cont}
      Questions:
      {q_json}
      Before answering the questions, please make sure you have read the context and questions carefully.
    check if the answer is available in the context, if not, please remove that question from the JSON response.       
    '''
    print(prompt)
    completion = openai.ChatCompletion.create(
    model="gpt-3.5-turbo", 
    temperature=0.4,
    messages=[
        {"role": "system", "content": "You are an expert at answering questions based on the context, observation and action"},
        {"role": "user", "content": "what is your age?"},
        {"role": "assistant", "content": "50"},
        {"role": "user", "content": prompt}
    ]
    )
    
    print("get_answers_llm > Total tokens used:", completion["usage"]["total_tokens"])
    return completion["choices"][0]["message"]["content"]

def revalidate_final_response_gpt(context:str,
                                  json_response:dict
                                  )->dict:
    
    prompt=f'''
    You will be provided with a list of questions in JSON format and should select right answer from the answer_options strictly based on the context provided. 
    If the answer is not available, remove that question from the JSON response.
    If answer is similar to "No", "None", "None of the above","I don't" remove that question from the JSON response.

    Context:{context}
    Questions:{json_response}

    Response should be in below format only, don't add anything to the response other than below format:
      {{
        "catalogQuestions": 
        [
            {{"id": "42", "question": "What is your gender?", "answer": "Female"}},
            {{"id": "632", "question": "What is your relationship status?", "answer": "Single, never married"}},
        ]
        }}
    
    Before answering the questions, please make sure you have read the context and questions carefully.
    check if the answer is available in the context, if not, please remove that question from the JSON response.       
    '''
    print(prompt)

    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=0.0,
        max_tokens=2000,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
        )
    json_response=response.choices[0].text
    print("revalidate_final_response_gpt > Total tokens used:", response["usage"]["total_tokens"])
    return json_response

# indxing methods

def data_cleanup(file_path):
    # file_path = 'index_store/surveys_storage_xls/Marketplace_Standard_Quals_EN_GBR.xlsx'
    file_name = os.path.basename(file_path)
    print(file_name)
    process_excel_data(file_path)
    csv_to_json(file_name.split('.')[0]+'_output.csv')
    return './index_store/doc_storage/'+file_name.split('.')[0]+'_output.json'


def store_to_index_db(file_path,iso):
    # create index_store/doc_storage folder if not exists
    if not os.path.exists('./index_store/survey_storage_db'):
        os.makedirs('./index_store/survey_storage_db')

    collection_name = iso.lower()+"-surveys"+"-v1"
    file_name = os.path.basename(file_path)
    data = json.loads(Path(file_path).read_text())

    loader = JSONLoader(
        file_path=file_path,
        jq_schema='.[]',
        text_content=False)

    data = loader.load()

    txt_file_path = './doc_storage/'+file_name.split('.')[0]+'loader.txt'
    with open(txt_file_path, 'w') as txt_file:
        for document in data: 
            txt_file.write(str(document))
            txt_file.write('\n')
            txt_file.write('----------------------------------------------------------')
            txt_file.write('\n')

    # convert collection_name to lower case
    embeddings = OpenAIEmbeddings()
    vector_store = Chroma.from_documents(
        data,
        embeddings,
        collection_name=collection_name,
        persist_directory="index_store/survey_storage_db",
    )
    # Save DB locally
    vector_store.persist()
    return collection_name


def questions_semantic_search(collection_name,desc):
    embedding = OpenAIEmbeddings()
    vector_store = Chroma(
        collection_name=collection_name,
        embedding_function=embedding,
        persist_directory="./index_store/survey_storage_db",
    )
    question_list = set()
    # desc = "People who live in Scotland having a webcam and use Facebook or Tinder"
    keywords = ast.literal_eval(get_keywords_chatgpt(desc))
    print("keywords---------------->",keywords)


    question_list = []
    question_ids = set()

    for keyword in keywords:
        query = f"questions related to keyword '{keyword}'"
        # print("---------------->", keyword)
        docs = vector_store.similarity_search(query, k=6)
        
        for doc in docs:
            question_json = json.loads(doc.page_content)
            # print(doc.page_content)
            question_id = question_json["ID"]
            
            if question_id not in question_ids:
                question = {
                    "ID": question_id,
                    "Question": question_json["Question"],
                    "Answer_Options": question_json["Answer_Options"]
                }
                question_list.append(question)
                question_ids.add(question_id)
                

#    create a new json with only ID and Question
    new_question_list = []
    for question in question_list:
        new_question = {
            "ID": question["ID"],
            "Question": question["Question"]
        }
        new_question_list.append(new_question)


    questions_without_answers = json.dumps(new_question_list)

    # create a json object with entire question_list "whole_json" and "questions_without_answers"
    json_response = {
        "entire_questions_answeres": question_list,
        "questions_without_answers": questions_without_answers
    }
    # print(json_response)
    # questions_without_answers
    return json_response