######################################################################## import pandas as pd import numpy as np import json from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import openai import google.generativeai as genai import os from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Configure API keys GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") openai.api_key = os.getenv("MY_OPENAI_KEY") genai.configure(api_key=GOOGLE_API_KEY) # # pre processing of the input data: # hotel_df = pd.read_csv(r"C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\cleaned_hotel_info.csv", encoding='utf-8') # attractions_data = json.load(open(r'C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\\attractions_details_batch1.json')) # reviews_data = json.load(open(r'C:\Meera\Jio\capstone\KKT_LLMRec\project\TravelGPT\ragnew\\attractions_reviews_batch1.json')) # # Preprocess data # hotel_df['location'] = hotel_df['location'].apply(lambda x: tuple(map(float, x.strip('()').split(',')))) # hotel_df['amenities'] = hotel_df['amenities'].apply(lambda x: x.split(', ')) # attraction_df = pd.DataFrame(attractions_data) # review_df = pd.DataFrame(reviews_data) # # Combine data # combined_df = pd.concat([hotel_df, attraction_df, review_df], ignore_index=True, sort=False) # combined_df['combined_text'] = combined_df.apply(lambda row: ' '.join(map(str, row.values)), axis=1) # # # Initialize the SentenceTransformer model # # model = SentenceTransformer('all-MiniLM-L6-v2') # # # Create embeddings for the combined text # # combined_df['embedding'] = list(model.encode(combined_df['combined_text'].tolist())) # # #save the embeddings to a .csv file # # combined_df.to_csv('combined_df_with_embeddings2.csv', index=False) # Load the data def load_data(): # Load embeddings combined_df = pd.read_csv('combined_df_with_embeddings2.csv') # Function to convert string representation to NumPy array def convert_to_array(embedding_str): return np.fromstring(embedding_str.strip('[]'), sep=' ') # Convert each value in the 'embedding' column to a NumPy array combined_df['embedding'] = combined_df['embedding'].apply(convert_to_array) return combined_df def retrieve_similar(combined_df, query, top_k=5): # Initialize the SentenceTransformer model model = SentenceTransformer('all-MiniLM-L6-v2') query_embedding = model.encode(query) combined_df['similarity'] = combined_df['embedding'].apply(lambda x: cosine_similarity([query_embedding], [x]).item()) top_docs = combined_df.nlargest(top_k, 'similarity') return top_docs def gen_response(context): model = genai.GenerativeModel('gemini-1.5-flash') response = model.generate_content(context) return response.text def rag_bot(query, combined_df, chat_history): retrieved_docs = retrieve_similar(combined_df, query) retrieved_info = '\n'.join(retrieved_docs['combined_text'].tolist()) chat_history_text = ' '.join([f"User: {entry['query']}\nSystem: {entry['response']}" for entry in chat_history]) context = f""" You are TravelGPT, a friendly and knowledgeable travel assistant with a knack for finding the best travel recommendations. Your goal is to help customers find hotels and travel destinations that perfectly match their preferences. You should For each question, provide atleast two personalized recommendations, including details on: - Price range - Country - Hotel ratings - Unique experiences offered by the hotel or destination - Nearby tourist attraction Your responses should be engaging and informative, highlighting the unique aspects and benefits of each recommendation. Always sort the answers by the cheapest option first and ensure that all information is accurate and fair. Your responses should be based on the following retrieved information: {retrieved_info} If you do not have enough information to provide an accurate answer, it is better to admit that rather than giving incorrect details. Do not provide too much information outside the retrieved information. Here's the chat history so far: {chat_history_text} Input from the user: {query} Your detailed and engaging response: """ response = gen_response(context) # Append the current interaction to the chat history chat_history.append({'query': query, 'response': response}) return response, chat_history