########################################################################

import pandas as pd
import numpy as np
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import openai
import google.generativeai as genai
import os
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()

# Configure API keys
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
openai.api_key = os.getenv("MY_OPENAI_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

# # pre processing of the input data:

# hotel_df = pd.read_csv(r"C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\cleaned_hotel_info.csv", encoding='utf-8')
# attractions_data = json.load(open(r'C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\\attractions_details_batch1.json'))
# reviews_data = json.load(open(r'C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\\attractions_reviews_batch1.json'))

#     # Preprocess data
# hotel_df['location'] = hotel_df['location'].apply(lambda x: tuple(map(float, x.strip('()').split(','))))
# hotel_df['amenities'] = hotel_df['amenities'].apply(lambda x: x.split(', '))
# attraction_df = pd.DataFrame(attractions_data)
# review_df = pd.DataFrame(reviews_data)

#     # Combine data
# combined_df = pd.concat([hotel_df, attraction_df, review_df], ignore_index=True, sort=False)
# combined_df['combined_text'] = combined_df.apply(lambda row: ' '.join(map(str, row.values)), axis=1)

# #     # Initialize the SentenceTransformer model
# # model = SentenceTransformer('all-MiniLM-L6-v2')

# #     # Create embeddings for the combined text
# # combined_df['embedding'] = list(model.encode(combined_df['combined_text'].tolist()))
# # #save the embeddings to a .csv file
# # combined_df.to_csv('combined_df_with_embeddings2.csv', index=False)


# Load the data
def load_data():
    # Load embeddings
    combined_df = pd.read_csv('combined_df_with_embeddings2.csv')

    # Function to convert string representation to NumPy array
    def convert_to_array(embedding_str):
        return np.fromstring(embedding_str.strip('[]'), sep=' ')

    # Convert each value in the 'embedding' column to a NumPy array
    combined_df['embedding'] = combined_df['embedding'].apply(convert_to_array)
    
    return combined_df


def retrieve_similar(combined_df, query, top_k=5):
    # Initialize the SentenceTransformer model
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode(query)
    combined_df['similarity'] = combined_df['embedding'].apply(lambda x: cosine_similarity([query_embedding], [x]).item())
    top_docs = combined_df.nlargest(top_k, 'similarity')
    return top_docs

def gen_response(context):
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(context)
    return response.text

def rag_bot(query, combined_df, chat_history):
    retrieved_docs = retrieve_similar(combined_df, query)
    retrieved_info = '\n'.join(retrieved_docs['combined_text'].tolist())
    
    chat_history_text = ' '.join([f"User: {entry['query']}\nSystem: {entry['response']}" for entry in chat_history])
    context = f"""
    You are TravelGPT, a friendly and knowledgeable travel assistant with a knack for finding the best travel recommendations.
    Your goal is to help customers find hotels and travel destinations that perfectly match their preferences. You should 
    
    For each question, provide atleast two personalized recommendations, including details on:
    - Price range
    - Country
    - Hotel ratings
    - Unique experiences offered by the hotel or destination
    - Nearby tourist attraction
    
    Your responses should be engaging and informative, highlighting the unique aspects and benefits of each recommendation.
    Always sort the answers by the cheapest option first and ensure that all information is accurate and fair.
    
    Your responses should be based on the following retrieved information:
    {retrieved_info}

    If you do not have enough information to provide an accurate answer, it is better to admit that rather than giving incorrect details. 
    Do not provide too much information outside the retrieved information.
    
    Here's the chat history so far:
    {chat_history_text}
    
    Input from the user: {query}
    
    Your detailed and engaging response:
    """

    response = gen_response(context)
    
    # Append the current interaction to the chat history
    chat_history.append({'query': query, 'response': response})
    
    return response, chat_history