{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Berhasil terkoneksi ke MySQL Server\n", "Database 'tourism_destination' berhasil dibuat!\n", "Koneksi ke MySQL ditutup\n" ] } ], "source": [ "import mysql.connector\n", "from mysql.connector import Error\n", "\n", "# Fungsi untuk membuat koneksi ke MySQL dan membuat database\n", "def create_database(host_name, user_name, user_password, db_name):\n", " try:\n", " # Koneksi ke server MySQL\n", " connection = mysql.connector.connect(\n", " host=host_name,\n", " user=user_name,\n", " password=user_password\n", " )\n", " \n", " if connection.is_connected():\n", " print(\"Berhasil terkoneksi ke MySQL Server\")\n", " cursor = connection.cursor()\n", " # Membuat database baru\n", " cursor.execute(f\"CREATE DATABASE {db_name}\")\n", " print(f\"Database '{db_name}' berhasil dibuat!\")\n", " \n", " except Error as e:\n", " print(f\"Error: '{e}' terjadi\")\n", " \n", " finally:\n", " # Menutup koneksi\n", " if connection.is_connected():\n", " cursor.close()\n", " connection.close()\n", " print(\"Koneksi ke MySQL ditutup\")\n", "\n", "# Contoh penggunaan\n", "host = \"localhost\"\n", "user = \"root\"\n", "password = \"admin123\"\n", "database_name = \"tourism_destination\"\n", "\n", "create_database(host, user, password, database_name)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Berhasil terkoneksi ke database 'tourism_destination'\n", "Tabel 'places' berhasil dibuat!\n", "Koneksi ke MySQL ditutup\n" ] } ], "source": [ "def create_table(host_name, user_name, user_password, db_name):\n", " try:\n", " # Koneksi ke MySQL dan pilih database\n", " connection = mysql.connector.connect(\n", " host=host_name,\n", " user=user_name,\n", " password=user_password,\n", " database=db_name\n", " )\n", " \n", " if connection.is_connected():\n", " print(f\"Berhasil terkoneksi ke database '{db_name}'\")\n", " cursor = connection.cursor()\n", " \n", " # Membuat tabel dengan kolom sesuai format yang diberikan\n", " create_table_query = \"\"\"\n", " CREATE TABLE places (\n", " Place_Id INT AUTO_INCREMENT PRIMARY KEY,\n", " Place_Name VARCHAR(255) NOT NULL,\n", " Description TEXT,\n", " Category VARCHAR(100),\n", " City VARCHAR(100),\n", " Price DECIMAL(10, 2), \n", " Rating FLOAT \n", " );\n", " \"\"\"\n", " cursor.execute(create_table_query)\n", " print(\"Tabel 'places' berhasil dibuat!\")\n", " \n", " except Error as e:\n", " print(f\"Error: '{e}' terjadi\")\n", " \n", " finally:\n", " # Menutup koneksi\n", " if connection.is_connected():\n", " cursor.close()\n", " connection.close()\n", " print(\"Koneksi ke MySQL ditutup\")\n", "\n", "# Contoh penggunaan\n", "host = \"localhost\"\n", "user = \"root\"\n", "password = \"admin123\"\n", "database_name = \"tourism_destination\"\n", "\n", "create_table(host, user, password, database_name)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Place_IdPlace_NameDescriptionCategoryCityPriceRatingTime_MinutesCoordinateLatLongUnnamed: 11Unnamed: 12
01Monumen NasionalMonumen Nasional atau yang populer disingkat d...BudayaJakarta200004.615.0{'lat': -6.1753924, 'lng': 106.8271528}-6.175392106.827153NaN1
12Kota TuaKota tua di Jakarta, yang juga bernama Kota Tu...BudayaJakarta04.690.0{'lat': -6.137644799999999, 'lng': 106.8171245}-6.137645106.817125NaN2
23Dunia FantasiDunia Fantasi atau disebut juga Dufan adalah t...Taman HiburanJakarta2700004.6360.0{'lat': -6.125312399999999, 'lng': 106.8335377}-6.125312106.833538NaN3
34Taman Mini Indonesia Indah (TMII)Taman Mini Indonesia Indah merupakan suatu kaw...Taman HiburanJakarta100004.5NaN{'lat': -6.302445899999999, 'lng': 106.8951559}-6.302446106.895156NaN4
45Atlantis Water AdventureAtlantis Water Adventure atau dikenal dengan A...Taman HiburanJakarta940004.560.0{'lat': -6.12419, 'lng': 106.839134}-6.124190106.839134NaN5
\n", "
" ], "text/plain": [ " Place_Id Place_Name \\\n", "0 1 Monumen Nasional \n", "1 2 Kota Tua \n", "2 3 Dunia Fantasi \n", "3 4 Taman Mini Indonesia Indah (TMII) \n", "4 5 Atlantis Water Adventure \n", "\n", " Description Category City \\\n", "0 Monumen Nasional atau yang populer disingkat d... Budaya Jakarta \n", "1 Kota tua di Jakarta, yang juga bernama Kota Tu... Budaya Jakarta \n", "2 Dunia Fantasi atau disebut juga Dufan adalah t... Taman Hiburan Jakarta \n", "3 Taman Mini Indonesia Indah merupakan suatu kaw... Taman Hiburan Jakarta \n", "4 Atlantis Water Adventure atau dikenal dengan A... Taman Hiburan Jakarta \n", "\n", " Price Rating Time_Minutes \\\n", "0 20000 4.6 15.0 \n", "1 0 4.6 90.0 \n", "2 270000 4.6 360.0 \n", "3 10000 4.5 NaN \n", "4 94000 4.5 60.0 \n", "\n", " Coordinate Lat Long \\\n", "0 {'lat': -6.1753924, 'lng': 106.8271528} -6.175392 106.827153 \n", "1 {'lat': -6.137644799999999, 'lng': 106.8171245} -6.137645 106.817125 \n", "2 {'lat': -6.125312399999999, 'lng': 106.8335377} -6.125312 106.833538 \n", "3 {'lat': -6.302445899999999, 'lng': 106.8951559} -6.302446 106.895156 \n", "4 {'lat': -6.12419, 'lng': 106.839134} -6.124190 106.839134 \n", "\n", " Unnamed: 11 Unnamed: 12 \n", "0 NaN 1 \n", "1 NaN 2 \n", "2 NaN 3 \n", "3 NaN 4 \n", "4 NaN 5 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv(r'dataset_recommendation_tourism\\tourism_with_id.csv')\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n", " 'Rating', 'Time_Minutes', 'Coordinate', 'Lat', 'Long', 'Unnamed: 11',\n", " 'Unnamed: 12'],\n", " dtype='object')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Place_Id', 'Place_Name', 'Description', 'Category', 'City', 'Price',\n", " 'Rating'],\n", " dtype='object')" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = data.drop(['Time_Minutes', 'Coordinate',\n", " 'Lat', 'Long', 'Unnamed: 11',\n", " 'Unnamed: 12'], axis=1)\n", "data.columns" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "data.to_csv('tourism_place.csv')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 437 entries, 0 to 436\n", "Data columns (total 7 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Place_Id 437 non-null int64 \n", " 1 Place_Name 437 non-null object \n", " 2 Description 437 non-null object \n", " 3 Category 437 non-null object \n", " 4 City 437 non-null object \n", " 5 Price 437 non-null int64 \n", " 6 Rating 437 non-null float64\n", "dtypes: float64(1), int64(2), object(4)\n", "memory usage: 24.0+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Place_IdPlace_NameDescriptionCategoryCityPriceRating
01Monumen NasionalMonumen Nasional atau yang populer disingkat d...BudayaJakarta200004.6
12Kota TuaKota tua di Jakarta, yang juga bernama Kota Tu...BudayaJakarta04.6
23Dunia FantasiDunia Fantasi atau disebut juga Dufan adalah t...Taman HiburanJakarta2700004.6
34Taman Mini Indonesia Indah (TMII)Taman Mini Indonesia Indah merupakan suatu kaw...Taman HiburanJakarta100004.5
45Atlantis Water AdventureAtlantis Water Adventure atau dikenal dengan A...Taman HiburanJakarta940004.5
\n", "
" ], "text/plain": [ " Place_Id Place_Name \\\n", "0 1 Monumen Nasional \n", "1 2 Kota Tua \n", "2 3 Dunia Fantasi \n", "3 4 Taman Mini Indonesia Indah (TMII) \n", "4 5 Atlantis Water Adventure \n", "\n", " Description Category City \\\n", "0 Monumen Nasional atau yang populer disingkat d... Budaya Jakarta \n", "1 Kota tua di Jakarta, yang juga bernama Kota Tu... Budaya Jakarta \n", "2 Dunia Fantasi atau disebut juga Dufan adalah t... Taman Hiburan Jakarta \n", "3 Taman Mini Indonesia Indah merupakan suatu kaw... Taman Hiburan Jakarta \n", "4 Atlantis Water Adventure atau dikenal dengan A... Taman Hiburan Jakarta \n", "\n", " Price Rating \n", "0 20000 4.6 \n", "1 0 4.6 \n", "2 270000 4.6 \n", "3 10000 4.5 \n", "4 94000 4.5 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\sentence_transformers\\cross_encoder\\CrossEncoder.py:11: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from tqdm.autonotebook import tqdm, trange\n" ] } ], "source": [ "import mysql.connector\n", "from mysql.connector import Error\n", "import ollama\n", "from sentence_transformers import SentenceTransformer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "from tqdm import tqdm\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def connect_to_database():\n", " try:\n", " connection = mysql.connector.connect(\n", " host=\"localhost\",\n", " user=\"root\",\n", " password=\"admin123\",\n", " database=\"tourism_destination\"\n", " )\n", " return connection\n", " except Error as e:\n", " print(f\"Error: '{e}'\")\n", " return None\n", " \n", " # Function to check if a column exists, and add it if necessary\n", "def add_embedding_column_if_not_exists(cursor):\n", " # Check if the 'Embedding' column exists\n", " cursor.execute(\"SHOW COLUMNS FROM places LIKE 'Embedding'\")\n", " result = cursor.fetchone()\n", " \n", " # If the 'Embedding' column does not exist, add it\n", " if not result:\n", " print(\"Adding 'Embedding' column to the table...\")\n", " cursor.execute(\"ALTER TABLE places ADD COLUMN Embedding TEXT\")\n", " print(\"'Embedding' column added.\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Adding 'Embedding' column to the table...\n" ] } ], "source": [ "connection = connect_to_database()\n", "cursor = connection.cursor()\n", "add_embedding_column_if_not_exists(cursor)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] } ], "source": [ "# Koneksi ke MySQL\n", "def connect_to_database():\n", " try:\n", " connection = mysql.connector.connect(\n", " host=\"localhost\",\n", " user=\"root\",\n", " password=\"admin123\",\n", " database=\"tourism_destination\"\n", " )\n", " return connection\n", " except Error as e:\n", " print(f\"Error: '{e}'\")\n", " return None\n", "\n", "\n", "\n", "# Compute and store embeddings\n", "def compute_and_store_embeddings():\n", " model = SentenceTransformer('paraphrase-MiniLM-L6-v2') \n", "\n", " # Connect to the database\n", " connection = connect_to_database()\n", " if connection is None:\n", " return\n", " \n", " cursor = connection.cursor(dictionary=True)\n", " \n", " # Select all places from the database\n", " cursor.execute(\"SELECT Place_Id, Place_Name, Category, Description, City FROM places\")\n", " places = cursor.fetchall()\n", " \n", " for place in places:\n", " # Combine PlaceName, Category, Description, and City into one string\n", " text = f\"{place['Place_Name']} {place['Category']} {place['Description']} {place['City']}\"\n", " \n", " # Generate embedding for the combined text\n", " embedding = model.encode(text)\n", " \n", " # Convert embedding to a string format to store in the database\n", " embedding_str = ','.join([str(x) for x in embedding])\n", " \n", " # Update the place in the database with the embedding\n", " cursor.execute(\n", " \"UPDATE places SET Embedding = %s WHERE Place_Id = %s\", \n", " (embedding_str, place['Place_Id'])\n", " )\n", " \n", " # Commit the changes and close the connection\n", " connection.commit()\n", " cursor.close()\n", " connection.close()\n", "\n", "# Run the function to compute and store embeddings\n", "compute_and_store_embeddings()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Top 5 Ranked Destinations:\n", "\n", "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6861)\n", "Wisata Batu Kuda (Rating: 4.4, Similarity Score: 0.6839)\n", "Gedung Agung Yogyakarta (Rating: 4.6, Similarity Score: 0.6727)\n", "Taman Sungai Mudal (Rating: 4.6, Similarity Score: 0.6595)\n", "Grand Maerakaca (Rating: 4.4, Similarity Score: 0.6581)\n" ] } ], "source": [ "# Koneksi ke MySQL\n", "def query_database():\n", " try:\n", " connection = mysql.connector.connect(\n", " host=\"localhost\",\n", " user=\"root\",\n", " password=\"admin123\",\n", " database=\"tourism_destination\"\n", " )\n", "\n", " if connection.is_connected():\n", " cursor = connection.cursor(dictionary=True)\n", " sql = \"SELECT * FROM places\"\n", " cursor.execute(sql)\n", " results = cursor.fetchall()\n", " return results\n", "\n", " except Error as e:\n", " print(f\"Error: '{e}'\")\n", " \n", " finally:\n", " if connection.is_connected():\n", " cursor.close()\n", " connection.close()\n", "\n", "# Get embedding from the database and calculate cosine similarity\n", "def get_similar_places(user_embedding, db_results):\n", " similarities = []\n", " \n", " for place in db_results:\n", " embedding_str = place['Embedding'] # Assuming embeddings are stored as comma-separated strings in the database\n", " embedding = np.array([float(x) for x in embedding_str.split(',')]) # Convert the string back to a numpy array\n", " \n", " # Compute cosine similarity\n", " similarity = cosine_similarity([user_embedding], [embedding])[0][0]\n", " similarities.append((place, similarity))\n", " \n", " # Sort results based on similarity and then by rating\n", " ranked_results = sorted(similarities, key=lambda x: (x[1], x[0]['Rating']), reverse=True)\n", " \n", " # Return top 5 places\n", " return ranked_results[:5]\n", "\n", "# Ollama - Generate possible places (Retrieval Augmented Generation)\n", "def generate_rag_result(user_query):\n", " prompt = f\"User Query: {user_query}\\n\\nPlease list 10 potential destinations based on user query:\"\n", " \n", " print(\"\\nGenerating results using Ollama (RAG)...\\n\")\n", " with tqdm(total=10, desc=\"Processing RAG\") as pbar:\n", " response = ollama.generate(model=\"llama3.1\", prompt=prompt)\n", " pbar.update(5)\n", " \n", " # Process the response (assuming response structure is consistent)\n", " print(\"Full response:\", response)\n", " return response # For now, we don't need to extract specific places, as similarity search will handle that\n", "\n", "# Main function to find the top 5 destinations\n", "def get_top_5_destinations(user_query):\n", " # Step 1: Generate embedding for user query\n", " model = SentenceTransformer('paraphrase-MiniLM-L6-v2')\n", " user_embedding = model.encode(user_query)\n", " \n", " # Step 2: Fetch all places from the database\n", " db_results = query_database()\n", " if not db_results or len(db_results) == 0:\n", " print(\"No data returned from database.\")\n", " return\n", " \n", " # Step 3: Find the most similar places\n", " top_5_places = get_similar_places(user_embedding, db_results)\n", " \n", " # Step 4: Display top 5 destinations\n", " print(\"\\nTop 5 Ranked Destinations:\\n\")\n", " for place, score in top_5_places:\n", " print(f\"{place['Place_Name']} (Rating: {place['Rating']}, Similarity Score: {score:.4f})\")\n", "\n", "# Example user query\n", "user_query = \"Saya ingin ke Jogjakarta dan saya suka dengan pemandangan alam. kemana saya harus pergi?\"\n", "get_top_5_destinations(user_query)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# prompt= \"do u know about LLM?\"\n", "# response = ollama.generate(model=\"llama3.1\", prompt=prompt)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# response['response'].strip().split('\\n')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "d:\\Data Science\\HACKATHON\\GEN AI LLAMA HACKTIV8\\llama_venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Top 5 Ranked Destinations:\n", "\n", "Pulau Semak Daun (Rating: 4.0, Similarity Score: 0.6407)\n", "Jembatan Merah (Rating: 4.5, Similarity Score: 0.6401)\n", "Pasar Beringharjo (Rating: 4.5, Similarity Score: 0.6331)\n", "Gereja Perawan Maria Tak Berdosa Surabaya (Rating: 4.8, Similarity Score: 0.6286)\n", "Perpustakaan Nasional (Rating: 4.7, Similarity Score: 0.6256)\n" ] } ], "source": [ "# Example user query\n", "user_query = \"Saya ingin ke Surabaya dan ingin berbelanja. kemana saya harus pergi?\"\n", "get_top_5_destinations(user_query)" ] } ], "metadata": { "kernelspec": { "display_name": "llama_venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }