Spaces:

taspol
/

PAN-SEA

Sleeping

App Files Files Community

taspol commited on Aug 14

Commit

7bc7ddb

1 Parent(s): 1e81c91

feat: add Dockerfile

Browse files

Files changed (9) hide show

Dockerfile +38 -0
app.py +78 -0
app/app.py +0 -69
data_importer.py +7 -2
interface.py +3 -0
plan_mock.json +0 -129
requirements.txt +38 -0
utils/llm_caller.py +210 -163
utils/youtube_extractor.py +2 -2

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONPATH=/app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create necessary directories
+RUN mkdir -p /app/data /app/logs
+# Expose port
+EXPOSE 8000
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/v1 || exit 1
+# Run the application
+CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from fastapi import FastAPI
+from interface import PlanRequest, PlanResponse, TripPlan , YoutubeLinkRequest, YoutubeLinkResponse, ChatRequest
+from data_importer import DataImporter
+from utils.llm_caller import LLMCaller
+import asyncio
+import time
+from datetime import datetime
+app = FastAPI()
+data_importer = DataImporter()
+agent = LLMCaller()
+@app.get("/v1")
+def greet_json():
+    start_time = time.time()
+    health_status = {
+        "status": "healthy",
+        "timestamp": datetime.utcnow().isoformat(),
+        "service": "SealionAI Travel Planning Service",
+        "version": "1.0.0",
+        "checks": {}
+    }
+    return health_status
+@app.post("/v1/generateTripPlan", response_model=PlanResponse)
+def generate_trip_plan(request: PlanRequest):
+    try:
+        trip_plan = asyncio.run(agent.query_with_rag(request))
+        return PlanResponse(tripOverview=trip_plan.tripOverview,
+                            query_params=request,
+                            retrieved_data=trip_plan.retrieved_data,
+                            trip_plan=trip_plan.trip_plan,
+                            meta={"status": "success", "timestamp": datetime.utcnow().isoformat()})
+    except Exception as e:
+        print(f"Error in generate_trip_plan: {e}")
+        # Return error response
+        return PlanResponse(
+            tripOverview=f"Error: {str(e)}",
+            query_params=request,
+            retrieved_data=[],
+            trip_plan=TripPlan(overview="Error occurred", total_estimated_cost=0.0, steps=[]),
+            meta={"status": "error", "error": str(e)}
+        )
+@app.post("/v1/addYoutubeLink", response_model=YoutubeLinkResponse)
+def add_youtube_link(request: YoutubeLinkRequest):
+    try:
+        data_importer.insert_from_youtube(request.video_id)
+    except Exception as e:
+        return YoutubeLinkResponse(
+            message="Failed to add YouTube link",
+            video_url= None
+        )
+    return YoutubeLinkResponse(
+        message="add successfully",
+        video_url=f"https://www.youtube.com/watch?v={request.video_id}"
+    )
+@app.post("/v1/searchSimilar", response_model=list[dict])
+def search_similar(request: YoutubeLinkRequest):
+    try:
+        results = data_importer.search_similar(query=request.video_id)
+        return results
+    except Exception as e:
+        print(f"Error during search: {e}")
+        return {"error": "Search failed"}
+    return []
+@app.post("/v1/basicChat", response_model=str)
+def basic_chat(request: ChatRequest):
+    user_message = request.message
+    llm_response = asyncio.run(agent.basic_query(
+        user_prompt=user_message
+    ))
+    return llm_response

app/app.py DELETED Viewed

@@ -1,69 +0,0 @@
-from fastapi import FastAPI
-from interface import PlanRequest, PlanResponse, PlanStep, TransportInfo, TripPlan , YoutubeLinkRequest, YoutubeLinkResponse
-from data_importer import DataImporter
-import os
-import json
-app = FastAPI()
-data_importer = DataImporter()
-def load_mock_data(path: str = "plan_mock.json") -> dict:
-    """Load mock data from plan_mock.json"""
-    try:
-        file_path = os.path.join(os.path.dirname(__file__), path)
-        with open(file_path, 'r', encoding='utf-8') as file:
-            return json.load(file)
-    except FileNotFoundError:
-        # Return default mock data if file not found
-        print("Mock data file not found. Using default mock data.")
-        return {"error": "Invalid JSON format"}
-@app.get("/v1")
-def greet_json():
-    return {"Hello": "World!"}
-@app.post("/v1/generateTripPlan", response_model=PlanResponse)
-def generate_trip_plan(request: PlanRequest):
-    mock_trip_plan = load_mock_data()
-    print(mock_trip_plan)
-    return PlanResponse(
-        tripOverview="Sample trip overview.",
-        query_params=request,
-        retrieved_data=[],
-        trip_plan=TripPlan(
-            overview="Sample trip overview",
-            total_estimated_cost=1000.0,
-            steps=[PlanStep(
-            day=1,
-            title="Arrival in New York",
-            description="Arrive at JFK Airport and check-in at the hotel.",
-            transport=TransportInfo(
-                mode="Plane",
-                departure="Your hometown airport",
-                arrival="JFK Airport",
-                duration_minutes=300,
-                price=300.0,
-                details="Non-stop flight"
-            ),
-            map_coordinates={"lat": 40.6413, "lon": -73.7781},
-            images=["https://example.com/images/jfk_airport.jpg"],
-            tips=["Bring a valid ID", "Confirm your hotel reservation"]
-        )]),
-        meta={"status": "success"}
-    )
-# @app.post("/v1/addYoutubeLink", response_model=YoutubeLinkResponse)
-# def add_youtube_link(request: YoutubeLinkRequest):
-#     try:
-#         data_importer.insert_from_youtube(request.video_id)
-#     except Exception as e:
-#         return YoutubeLinkResponse(
-#             message="Failed to add YouTube link",
-#             video_url=None
-#         )
-#     return YoutubeLinkResponse(
-#         message="add successfully",
-#         video_url=f"https://www.youtube.com/watch?v={request.video_id}"
-#     )

data_importer.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import List, Dict, Optional, Union
 import uuid
 class DataImporter:
-    def __init__(self, qdrant_url: str = "http://localhost:6333", collection_name: str = "demo_bge_m3"):
         self.model = SentenceTransformer("BAAI/bge-m3")
         self.client = QdrantClient(url=qdrant_url)
         self.collection_name = collection_name
@@ -17,6 +17,11 @@ class DataImporter:
     def _create_collection(self):
         try:
             self.client.recreate_collection(
                 collection_name=self.collection_name,
                 vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
@@ -67,7 +72,7 @@ class DataImporter:
     def insert_from_youtube(self, video_id: str, metadata: Optional[Dict] = None) -> Optional[str]:
         try:
             # Extract text from YouTube (assuming your YoutubeExtractor has this method)
-            text = self.youtube_extractor.extract_transcript(video_id)
             if text:
                 video_metadata = {"source": "youtube", "video_id": video_id}
                 if metadata:

 import uuid
 class DataImporter:
+    def __init__(self, qdrant_url: str = "https://qdrant.taspolsd.dev", collection_name: str = "demo_bge_m3"):
         self.model = SentenceTransformer("BAAI/bge-m3")
         self.client = QdrantClient(url=qdrant_url)
         self.collection_name = collection_name
     def _create_collection(self):
         try:
+            collections = self.client.get_collection(self.collection_name)
+            if collections:
+                print(f"Collection '{self.collection_name}' already exists.")
+                return
             self.client.recreate_collection(
                 collection_name=self.collection_name,
                 vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
     def insert_from_youtube(self, video_id: str, metadata: Optional[Dict] = None) -> Optional[str]:
         try:
             # Extract text from YouTube (assuming your YoutubeExtractor has this method)
+            text = self.youtube_extractor.get_full_text(video_id)
             if text:
                 video_metadata = {"source": "youtube", "video_id": video_id}
                 if metadata:

interface.py CHANGED Viewed

@@ -65,3 +65,6 @@ class PlanResponse(BaseModel):
     retrieved_data: List[RetrievedItem]
     trip_plan: TripPlan
     meta: Dict[str, Any]

     retrieved_data: List[RetrievedItem]
     trip_plan: TripPlan
     meta: Dict[str, Any]
+class ChatRequest(BaseModel):
+    message: str

plan_mock.json DELETED Viewed

@@ -1,129 +0,0 @@
-{
-  "tripOverview": {
-    "summary": "Adventure trip from Bangkok to Chiang Mai with cultural exploration",
-    "regions": ["Thailand", "Chiang Mai"],
-    "destination": "Chiang Mai, Thailand",
-    "EstimatedCost": "Approximately 10,000 THB per person",
-    "durationDays": 5,
-    "purpose": "Adventure, cultural experience"
-  },
-  "milestones": [
-    "Bangkok Suvarnabhumi Airport",
-    "Chiang Mai International Airport",
-    "Doi Suthep-Pui National Park",
-    "Chiang Mai Old City",
-    "Elephant Nature Park",
-    "Chiang Rai Night Bazaar"
-  ],
-  "transportation": [
-    {
-      "mode": "Plane",
-      "from": "Bangkok Suvarnabhumi Airport",
-      "to": "Chiang Mai International Airport",
-      "schedule": "2025-09-15T08:00:00Z",
-      "price": "1500 THB per person"
-    },
-    {
-      "mode": "Songthaew (Red Truck)",
-      "from": "Chiang Mai International Airport",
-      "to": "Chiang Mai Old City",
-      "schedule": "2025-09-15T11:00:00Z",
-      "price": "40 THB per person"
-    }
-  ],
-  "accommodation": [
-    {
-      "type": "Guesthouse",
-      "location": "Chiang Mai Old City",
-      "contact": "053-211-111",
-      "notes": "Centrally located with air conditioning"
-    }
-  ],
-  "tripRoute": [
-    {
-      "day": 1,
-      "activities": [
-        "Arrival at Chiang Mai International Airport",
-        "Check-in at guesthouse",
-        "Explore Chiang Mai Old City",
-        "Visit Wat Phra That Doi Suthep"
-      ],
-      "walkingRoute": "Old City Moat",
-      "signage": "Follow city maps",
-      "suggestions": ["Wear comfortable shoes", "Try local street food"],
-      "precautions": ["Beware of traffic"]
-    },
-    {
-      "day": 2,
-      "activities": [
-        "Visit Elephant Nature Park",
-        "Participate in elephant conservation activities"
-      ],
-      "transport": {
-        "mode": "Van",
-        "from": "Chiang Mai Old City",
-        "to": "Elephant Nature Park",
-        "price": "800 THB per person",
-        "duration_minutes": 60
-      },
-      "suggestions": ["Wear long sleeves", "Bring sunscreen"],
-      "precautions": ["Follow park guidelines"]
-    },
-    {
-      "day": 3,
-      "activities": [
-        "Trekking in Doi Suthep-Pui National Park",
-        "Visit waterfalls and scenic viewpoints"
-      ],
-      "transport": {
-        "mode": "Songthaew (Red Truck)",
-        "from": "Chiang Mai Old City",
-        "to": "Doi Suthep-Pui National Park",
-        "price": "50 THB per person",
-        "duration_minutes": 45
-      },
-      "suggestions": ["Bring water and snacks", "Wear hiking boots"],
-      "precautions": ["Beware of slippery trails"]
-    },
-    {
-      "day": 4,
-      "activities": [
-        "Visit Chiang Rai Night Bazaar",
-        "Explore local markets and try street food"
-      ],
-      "transport": {
-        "mode": "Bus",
-        "from": "Chiang Mai Arcade Bus Station",
-        "to": "Chiang Rai Night Bazaar",
-        "price": "200 THB per person",
-        "duration_minutes": 180
-      },
-      "suggestions": ["Bring cash", "Try local delicacies"],
-      "precautions": ["Beware of pickpockets"]
-    },
-    {
-      "day": 5,
-      "activities": [
-        "Return to Bangkok",
-        "Departure from Chiang Mai International Airport"
-      ],
-      "transport": {
-        "mode": "Plane",
-        "from": "Chiang Mai International Airport",
-        "to": "Bangkok Suvarnabhumi Airport",
-        "price": "1500 THB per person",
-        "duration_minutes": 75
-      }
-    }
-  ],
-  "emergencyContacts": {
-    "localRangers": "053-211-111",
-    "hospital": "Chiang Mai Ram Hospital: 053-211-111",
-    "embassy": "Thai Embassy: 02-281-0141"
-  },
-  "tips": [
-    "Always carry a map or use a GPS app",
-    "Respect local customs and traditions",
-    "Stay hydrated and wear sunscreen"
-  ]
-}

requirements.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+# Web Framework
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+# Environment and Configuration
+python-dotenv==1.0.0
+# AI/ML Libraries
+openai==1.3.7
+sentence-transformers==2.2.2
+torch==2.1.1
+transformers==4.35.2
+# Vector Database
+qdrant-client==1.6.9
+# YouTube Processing
+youtube-transcript-api==0.6.1
+# HTTP Client
+httpx==0.25.2
+# Data Processing
+pydantic==2.5.0
+typing-extensions==4.8.0
+# Standard Libraries (usually included but explicit)
+asyncio
+json
+os
+time
+datetime
+uuid
+re
+# Optional: For better performance
+uvloop==0.19.0  # Unix only
+python-multipart==0.0.6

utils/llm_caller.py CHANGED Viewed

@@ -1,196 +1,243 @@
 import os
 import asyncio
 import httpx
 from typing import List, Optional, Dict, Any
 from dataclasses import dataclass
 from qdrant_client import QdrantClient
 SYSTEM_PROMPT = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
 If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
 '''
 '''
-@dataclass
-class RetrievedItem:
-    place_id: str
-    place_name: str
-    description: Optional[str]
-    score: float
-    metadata: Dict[str, Any]
 class LLMCaller:
     def __init__(self):
         # Environment variables
-        self.qdrant_host = os.getenv("QDRANT_HOST", "localhost")
-        self.qdrant_api_key = os.getenv("QDRANT_API_KEY", None)
-        self.qdrant_collection = os.getenv("QDRANT_COLLECTION", "trip_places")
-        self.embedding_dim = int(os.getenv("EMBEDDING_DIM", "1024"))
-        self.top_k = int(os.getenv("TOP_K", "6"))
-        # LLM configuration
-        self.llm_api_url = os.getenv("LLM_API_URL", "https://api.openai.com/v1/chat/completions")
-        self.llm_api_key = os.getenv("LLM_API_KEY", "sk-REPLACE_ME")
-        # Initialize Qdrant client
         self.qdrant = QdrantClient(
-            host=self.qdrant_host,
-            api_key=self.qdrant_api_key
         )
-    async def call_llm(self, system_prompt: str, user_prompt: str, max_tokens: int = 512, model: str = "sea-lion-7b-instruct") -> str:
-        """
-        Call LLM with system and user prompts
-        Args:
-            system_prompt (str): System message for the LLM
-            user_prompt (str): User message/question
-            max_tokens (int): Maximum tokens to generate
-            model (str): Model to use
-        Returns:
-            str: LLM response text
-        """
-        headers = {
-            "Authorization": f"Bearer {self.llm_api_key}",
-            "Content-Type": "application/json",
-        }
-        payload = {
-            "model": model,
-            "messages": [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt}
-            ],
-            "max_tokens": max_tokens,
-            "temperature": 0.7,
-        }
         try:
-            async with httpx.AsyncClient(timeout=30) as client:
-                resp = await client.post(self.llm_api_url, json=payload, headers=headers)
-                resp.raise_for_status()
-                data = resp.json()
-                # Handle OpenAI-like response format
-                if "choices" in data and len(data["choices"]) > 0:
-                    return data["choices"][0]["message"]["content"]
-                # Fallback for other formats
-                return data.get("text", "")
         except Exception as e:
             print(f"Error calling LLM: {e}")
             return f"Error: Unable to get LLM response - {str(e)}"
-    async def query_qdrant(self, query_embedding: List[float], top_k: Optional[int] = None, collection_name: Optional[str] = None) -> List[RetrievedItem]:
         """
-        Query Qdrant vector database
-        Args:
-            query_embedding (List[float]): Query vector embedding
-            top_k (int, optional): Number of results to return
-            collection_name (str, optional): Collection name to query
-        Returns:
-            List[RetrievedItem]: Retrieved items from Qdrant
         """
-        top_k = top_k or self.top_k
-        collection_name = collection_name or self.qdrant_collection
-        def _search():
-            try:
-                hits = self.qdrant.search(
-                    collection_name=collection_name,
-                    query_vector=query_embedding,
-                    limit=top_k,
-                    with_payload=True,
                 )
-                items: List[RetrievedItem] = []
-                for h in hits:
-                    payload = h.payload or {}
-                    items.append(RetrievedItem(
-                        place_id=str(h.id),
-                        place_name=payload.get("name") or payload.get("title") or "",
-                        description=payload.get("description") or payload.get("summary") or None,
-                        score=float(h.score) if h.score is not None else 0.0,
-                        metadata=payload,
-                    ))
-                return items
-            except Exception as e:
-                print(f"Error querying Qdrant: {e}")
-                return []
-        return await asyncio.to_thread(_search)
-    async def rag_query(self, query: str, query_embedding: List[float], system_prompt: Optional[str] = None) -> Dict[str, Any]:
-        # Retrieve relevant items from Qdrant
-        retrieved_items = await self.query_qdrant(query_embedding)
-        # Build context from retrieved items
-        context_parts = []
-        for item in retrieved_items:
-            context_parts.append(f"- {item.place_name}: {item.description or 'No description available'}")
-        context = "\n".join(context_parts) if context_parts else "No relevant information found."
-        # Default system prompt if none provided
-        if not system_prompt:
-            system_prompt = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
-                               If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
-        # Create user prompt with context
-        user_prompt = f"""Context:
-{context}
-Question: {query}
-Please provide a helpful response based on the context above."""
-        # Get LLM response
-        llm_response = await self.call_llm(system_prompt, user_prompt)
-        return {
-            "answer": llm_response,
-            "retrieved_items": retrieved_items,
-            "context": context,
-            "query": query
-        }
-    def update_config(self, **kwargs):
-        """
-        Update configuration parameters
-        Args:
-            **kwargs: Configuration parameters to update
-        """
-        for key, value in kwargs.items():
-            if hasattr(self, key):
-                setattr(self, key, value)
-            else:
-                print(f"Warning: Unknown configuration parameter: {key}")
-# Example usage
-if __name__ == "__main__":
-    async def main():
-        # Initialize LLM caller
-        llm_caller = LLMCaller()
-        # Example embedding (replace with actual embedding)
-        query_embedding = [0.1] * 1024  # Dummy embedding
-        # Perform RAG query
-        result = await llm_caller.rag_query(
-            query="What are the best places to visit in Thailand?",
-            query_embedding=query_embedding
-        )
-        print("Answer:", result["answer"])
-        print(f"Found {len(result['retrieved_items'])} relevant items")
-        # Direct LLM call
-        response = await llm_caller.call_llm(
-            system_prompt="You are a helpful assistant.",
-            user_prompt="What is the capital of Thailand?"
-        )
-        print("Direct LLM Response:", response)
-    asyncio.run(main())

 import os
 import asyncio
 import httpx
+from dotenv import load_dotenv
 from typing import List, Optional, Dict, Any
 from dataclasses import dataclass
 from qdrant_client import QdrantClient
+from openai import OpenAI
+from sentence_transformers import SentenceTransformer
+from interface import PlanResponse, TripPlan, PlanStep, TransportInfo, RetrievedItem, PlanRequest
+import json
+load_dotenv()
 SYSTEM_PROMPT = """You are a helpful travel assistant. Use the provided context to answer the user's question about travel destinations and places.
 If the context doesn't contain relevant information, say so politely and provide general advice if possible."""
 '''
 '''
+# @dataclass
+# class RetrievedItem:
+#     place_id: str
+#     place_name: str
+#     description: Optional[str]
+#     score: float
+#     metadata: Dict[str, Any]
 class LLMCaller:
     def __init__(self):
         # Environment variables
+        self.client = OpenAI(
+                                api_key=os.getenv("SEALION_API"),
+                                base_url=os.getenv("SEALION_BASE_URL"),
+                            )
+        self.top_k = 3
+        self.qdrant_host = os.getenv("QDRANT_HOST")
         self.qdrant = QdrantClient(
+            url=self.qdrant_host,
         )
+        self.system_prompt = SYSTEM_PROMPT
+        self.embedding_model = SentenceTransformer("BAAI/bge-m3")
+        self.collection_name = "demo_bge_m3"
+    async def basic_query(self, user_prompt: str, max_tokens: int = 1024, model: str = "aisingapore/Gemma-SEA-LION-v3-9B-IT") -> str:
         try:
+            completion = self.client.chat.completions.create(
+                model=model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": self.system_prompt
+                    },
+                    {
+                        "role": "user",
+                        "content": user_prompt
+                    }
+                ]
+            )
+            return completion.choices[0].message.content
         except Exception as e:
             print(f"Error calling LLM: {e}")
             return f"Error: Unable to get LLM response - {str(e)}"
+    async def query_with_rag(self, plan_request: PlanRequest, collection_name: Optional[str] = None) -> 'PlanResponse':
         """
+        Perform RAG query using PlanRequest, embed query, search Qdrant, and generate complete PlanResponse via LLM
         """
+        print(plan_request)
+        try:
+            # 1. Create query string from PlanRequest
+            query_text = f"Trip from {plan_request.start_place} to {plan_request.destination_place}"
+            if plan_request.trip_context:
+                query_text += f" for {plan_request.trip_context}"
+            if plan_request.trip_duration_days:
+                query_text += f" for {plan_request.trip_duration_days} days"
+            if plan_request.trip_price:
+                query_text += f" with budget {plan_request.trip_price}"
+            # 2. Generate embedding for the query
+            query_embedding = self.embedding_model.encode(query_text, normalize_embeddings=True).tolist()
+            # 3. Search Qdrant for similar content
+            collection = collection_name or self.collection_name
+            top_k = plan_request.top_k or self.top_k
+            search_results = self.qdrant.search(
+                collection_name=collection,
+                query_vector=query_embedding,
+                limit=top_k,
+                with_payload=True
+            )
+            # 4. Convert search results to RetrievedItem format
+            retrieved_data = []
+            context_text = ""
+            for result in search_results:
+                retrieved_item = RetrievedItem(
+                    place_id=str(result.id),
+                    place_name=result.payload.get("place_name", "Unknown"),
+                    description=result.payload.get("text", ""),
+                    score=result.score,
+                    metadata=result.payload
                 )
+                retrieved_data.append(retrieved_item)
+                context_text += f"\n{result.payload.get('text', '')}"
+            # 5. Create detailed prompt for LLM to generate structured response
+            llm_prompt = f"""
+            You are a travel planning assistant. Based on the trip request and travel context provided, generate a comprehensive trip plan in the exact JSON format specified below.
+            Trip Request:
+            - From: {plan_request.start_place}
+            - To: {plan_request.destination_place}
+            - Duration: {plan_request.trip_duration_days} days
+            - Budget: {plan_request.trip_price}
+            - Context: {plan_request.trip_context}
+            - Group Size: {plan_request.group_size}
+            - Preferences: {plan_request.preferences}
+            Relevant Travel Context:
+            {context_text}
+            Generate a response in this EXACT JSON format (no additional text before or after):
+            {{
+                "tripOverview": "A comprehensive 2-3 paragraph overview of the entire trip",
+                "trip_plan": {{
+                    "overview": "Brief summary of the trip plan",
+                    "total_estimated_cost": estimated_total_cost_as_number,
+                    "steps": [
+                        {{
+                            "day": 1,
+                            "title": "Day 1 title",
+                            "description": "Detailed description of day 1 activities",
+                            "transport": {{
+                                "mode": "transportation method",
+                                "departure": "departure location",
+                                "arrival": "arrival location",
+                                "duration_minutes": estimated_duration_in_minutes,
+                                "price": estimated_price,
+                                "details": "additional transport details"
+                            }},
+                            "map_coordinates": {{"lat": latitude_number, "lon": longitude_number}},
+                            "images": ["url1", "url2"],
+                            "tips": ["tip1", "tip2", "tip3"]
+                        }}
+                    ]
+                }}
+            }}
+            Don't Explain or add any additional text outside the JSON format.
+            Ensure the JSON is valid and well-structured.
+            Create {plan_request.trip_duration_days or 1} days of detailed activities. Include realistic prices, coordinates, and practical tips. Make it specific to the destinations and context provided.
+            """
+            # 6. Call LLM to generate structured trip plan
+            llm_response = await self.basic_query(user_prompt=llm_prompt, max_tokens=12048)
+            # 7. Parse LLM response as JSON
+            try:
+                # Clean the response and parse JSON
+                json_str = llm_response.strip()
+                if json_str.startswith("```json"):
+                    json_str = json_str[7:]
+                if json_str.endswith("```"):
+                    json_str = json_str[:-3]
+                llm_data = json.loads(json_str)
+                # Convert to PlanResponse structure
+                trip_plan_data = llm_data.get("trip_plan", {})
+                steps_data = trip_plan_data.get("steps", [])
+                # Convert steps to PlanStep objects
+                plan_steps = []
+                for step in steps_data:
+                    transport_data = step.get("transport", {})
+                    transport = TransportInfo(
+                        mode=transport_data.get("mode"),
+                        departure=transport_data.get("departure"),
+                        arrival=transport_data.get("arrival"),
+                        duration_minutes=transport_data.get("duration_minutes"),
+                        price=transport_data.get("price"),
+                        details=transport_data.get("details")
+                    )
+                    plan_step = PlanStep(
+                        day=step.get("day"),
+                        title=step.get("title"),
+                        description=step.get("description"),
+                        transport=transport,
+                        map_coordinates=step.get("map_coordinates", {}),
+                        images=step.get("images", []),
+                        tips=step.get("tips", [])
+                    )
+                    plan_steps.append(plan_step)
+                trip_plan = TripPlan(
+                    overview=trip_plan_data.get("overview", ""),
+                    total_estimated_cost=trip_plan_data.get("total_estimated_cost"),
+                    steps=plan_steps
+                )
+                return PlanResponse(
+                    tripOverview=llm_data.get("tripOverview", ""),
+                    query_params=plan_request,
+                    retrieved_data=retrieved_data,
+                    trip_plan=trip_plan,
+                    meta={
+                        "status": "success",
+                        "query_text": query_text,
+                        "results_count": len(retrieved_data)
+                    }
+                )
+            except json.JSONDecodeError as e:
+                print(f"Error parsing LLM JSON response: {e}")
+                print(f"LLM Response: {llm_response}")
+                # Fallback: create basic response with LLM text
+                return PlanResponse(
+                    tripOverview=llm_response,
+                    query_params=plan_request,
+                    retrieved_data=retrieved_data,
+                    trip_plan=TripPlan(
+                        overview="Generated plan (parsing error)",
+                        total_estimated_cost=plan_request.trip_price,
+                        steps=[]
+                    ),
+                    meta={"status": "json_parse_error", "error": str(e)}
+                )
+        except Exception as e:
+            print(f"Error in RAG query: {e}")
+            return PlanResponse(
+                tripOverview=f"Error generating trip plan: {str(e)}",
+                query_params=plan_request,
+                retrieved_data=[],
+                trip_plan=TripPlan(overview="Error occurred", total_estimated_cost=0.0, steps=[]),
+                meta={"status": "error", "error": str(e)}
+            )

utils/youtube_extractor.py CHANGED Viewed

@@ -7,7 +7,7 @@ class YoutubeExtractor:
     def extract_transcript(self, video_id: str) -> Optional[List[Dict]]:
         try:
-            transcript = self.ytt_api.fetch(video_id)
             return transcript
         except Exception as e:
             print(f"An error occurred: {e}")
@@ -15,7 +15,7 @@ class YoutubeExtractor:
     def get_text_only(self, video_id: str) -> Optional[List[str]]:
         transcript = self.extract_transcript(video_id)
         if transcript:
-            return [entry['text'] for entry in transcript]
         return None
     def get_full_text(self, video_id: str) -> Optional[str]:

     def extract_transcript(self, video_id: str) -> Optional[List[Dict]]:
         try:
+            transcript = self.ytt_api.fetch(video_id,languages=['en', 'th'])
             return transcript
         except Exception as e:
             print(f"An error occurred: {e}")
     def get_text_only(self, video_id: str) -> Optional[List[str]]:
         transcript = self.extract_transcript(video_id)
         if transcript:
+            return [entry.text for entry in transcript]
         return None
     def get_full_text(self, video_id: str) -> Optional[str]: