Spaces:

abdullafahem
/

trip_planner

Sleeping

App Files Files Community

Abdulla Fahem commited on 22 days ago

Commit

a86a6db

•

1 Parent(s): 23d2d4b

Add application file

Browse files

Files changed (1) hide show

app.py +68 -304

app.py CHANGED Viewed

@@ -19,23 +19,14 @@ torch.manual_seed(42)
 random.seed(42)
 # Environment setup
-os.environ['KMP_DUPLICATE_LIB_OK']='TRUE'
 class TravelDataset(Dataset):
     def __init__(self, data, tokenizer, max_length=512):
-        """
-        Initialize the dataset for travel planning
-        Parameters:
-        - data: DataFrame containing travel planning data
-        - tokenizer: Tokenizer for encoding input and output
-        - max_length: Maximum sequence length
-        """
         self.tokenizer = tokenizer
         self.data = data
         self.max_length = max_length
-        # Print dataset information
         print(f"Dataset loaded with {len(data)} samples")
         print("Columns:", list(data.columns))
@@ -43,18 +34,12 @@ class TravelDataset(Dataset):
         return len(self.data)
     def __getitem__(self, idx):
-        """
-        Prepare an individual training sample
-        Returns a dictionary with input_ids, attention_mask, and labels
-        """
         row = self.data.iloc[idx]
-        # Prepare input text
-        input_text = self.format_input_text(row)
-        # Prepare target text (travel plan)
-        target_text = row['target']
         # Tokenize inputs
         input_encodings = self.tokenizer(
@@ -79,160 +64,100 @@ class TravelDataset(Dataset):
             'attention_mask': input_encodings['attention_mask'].squeeze(),
             'labels': target_encodings['input_ids'].squeeze()
         }
-    @staticmethod
-    def format_input_text(row):
-        """
-        Format input text for the model
-        This method creates a prompt that the model will use to generate a travel plan
-        """
-        # Format the input text based on available columns
-        destination = row.get('dest', 'Unknown')
-        days = row.get('days', 3)
-        budget = row.get('budget', 'Moderate')
-        interests = row.get('interests', 'Culture, Food')
-        return f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {interests}"
 def load_dataset():
     """
-    Load the travel planning dataset from HuggingFace
-    Returns:
-    - pandas DataFrame with the dataset
     """
     try:
-        # Load dataset from CSV
         data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv")
-        # Basic data validation
-        required_columns = ['dest', 'days', 'budget', 'interests', 'target']
         for col in required_columns:
             if col not in data.columns:
                 raise ValueError(f"Missing required column: {col}")
-        # Print dataset info
-        print("Dataset successfully loaded")
-        print(f"Total samples: {len(data)}")
-        print("Columns:", list(data.columns))
         return data
     except Exception as e:
         print(f"Error loading dataset: {e}")
         sys.exit(1)
 def train_model():
-    """
-    Train the T5 model for travel planning
-    Returns:
-    - Trained model
-    - Tokenizer
-    """
     try:
         # Load dataset
         data = load_dataset()
         # Initialize model and tokenizer
         print("Initializing T5 model and tokenizer...")
         tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False)
         model = T5ForConditionalGeneration.from_pretrained('t5-base')
-        # Split data into training and validation sets
         train_size = int(0.8 * len(data))
         train_data = data[:train_size]
         val_data = data[train_size:]
-        print(f"Training set size: {len(train_data)}")
-        print(f"Validation set size: {len(val_data)}")
-        # Create datasets
         train_dataset = TravelDataset(train_data, tokenizer)
         val_dataset = TravelDataset(val_data, tokenizer)
-        # Training arguments
         training_args = TrainingArguments(
-            output_dir=f"./travel_planner_model_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
             num_train_epochs=3,
             per_device_train_batch_size=4,
             per_device_eval_batch_size=4,
-            warmup_steps=500,
-            weight_decay=0.01,
-            logging_dir="./logs",
-            logging_steps=10,
             evaluation_strategy="steps",
             eval_steps=50,
             save_steps=100,
             load_best_model_at_end=True,
         )
-        # Data collator
         data_collator = DataCollatorForSeq2Seq(
             tokenizer=tokenizer,
             model=model,
             padding=True
         )
-        # Initialize trainer
         trainer = Trainer(
             model=model,
             args=training_args,
             train_dataset=train_dataset,
             eval_dataset=val_dataset,
-            data_collator=data_collator,
         )
-        # Train the model
-        print("Starting model training...")
         trainer.train()
-        # Save the model and tokenizer
-        model_path = "./trained_travel_planner"
-        model.save_pretrained(model_path)
-        tokenizer.save_pretrained(model_path)
-        print("Model training completed and saved!")
         return model, tokenizer
     except Exception as e:
-        print(f"Error during model training: {str(e)}")
         return None, None
-def generate_travel_plan(destination, days, interests, budget, model, tokenizer):
     """
-    Generate a travel plan using the trained model
-    Parameters:
-    - destination: Travel destination
-    - days: Trip duration
-    - interests: User's interests
-    - budget: Trip budget level
-    - model: Trained T5 model
-    - tokenizer: Model tokenizer
-    Returns:
-    - Generated travel plan
     """
     try:
-        # Format input prompt
-        prompt = f"Plan a trip to {destination} for {days} days with a {budget} budget. Include activities related to: {', '.join(interests)}"
-        # Tokenize input
         inputs = tokenizer(
-            prompt,
             return_tensors="pt",
             max_length=512,
             padding="max_length",
             truncation=True
         )
-        # Move to GPU if available
         if torch.cuda.is_available():
             inputs = {k: v.cuda() for k, v in inputs.items()}
             model = model.cuda()
-        # Generate output
         outputs = model.generate(
             **inputs,
             max_length=512,
@@ -240,14 +165,10 @@ def generate_travel_plan(destination, days, interests, budget, model, tokenizer)
             no_repeat_ngram_size=3,
             num_return_sequences=1
         )
-        # Decode and return the travel plan
-        travel_plan = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return travel_plan
     except Exception as e:
-        print(f"Error generating travel plan: {e}")
-        return "Could not generate travel plan."
 def main():
     st.set_page_config(
@@ -255,201 +176,44 @@ def main():
         page_icon="✈️",
         layout="wide"
     )
     st.title("✈️ AI Travel Planner")
-    st.markdown("### Plan your perfect trip with AI assistance!")
-    # Add training button in sidebar only
     with st.sidebar:
         st.header("Model Management")
         if st.button("Retrain Model"):
-            with st.spinner("Training new model... This will take a while..."):
                 model, tokenizer = train_model()
-                if model is not None:
                     st.session_state['model'] = model
                     st.session_state['tokenizer'] = tokenizer
-                    st.success("Model training completed!")
-        # Add model information
-        st.markdown("### Model Information")
-        if 'model' in st.session_state:
-            st.success("✓ Model loaded")
-            st.info("""
-            This model was trained on travel plans for:
-            - Destinations from HuggingFace dataset
-            - Flexible days duration
-            - Multiple budget levels
-            - Various interest combinations
-            """)
-        # Load or train model
-        if 'model' not in st.session_state:
-            with st.spinner("Loading AI model... Please wait..."):
-                model, tokenizer = train_model()  # Changed from load_or_train_model
-                if model is None or tokenizer is None:
-                    st.error("Failed to load/train the AI model. Please try again.")
-                    return
-                st.session_state.model = model
-                st.session_state.tokenizer = tokenizer
-    # Create two columns for input form
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        # Input form in a card-like container
-        with st.container():
-            st.markdown("### 🎯 Plan Your Trip")
-            # Destination and Duration row
-            dest_col, days_col = st.columns(2)
-            with dest_col:
-                destination = st.text_input(
-                    "🌍 Destination",
-                    placeholder="e.g., Paris, Tokyo, New York...",
-                    help="Enter the city you want to visit"
-                )
-            with days_col:
-                days = st.slider(
-                    "📅 Number of days",
-                    min_value=1,
-                    max_value=14,
-                    value=3,
-                    help="Select the duration of your trip"
-                )
-            # Budget and Interests row
-            budget_col, interests_col = st.columns(2)
-            with budget_col:
-                budget = st.selectbox(
-                    "💰 Budget Level",
-                    ["Budget", "Moderate", "Luxury"],
-                    help="Select your preferred budget level"
-                )
-            with interests_col:
-                interests = st.multiselect(
-                    "🎯 Interests",
-                    ["Culture", "History", "Food", "Nature", "Shopping",
-                    "Adventure", "Relaxation", "Art", "Museums"],
-                    ["Culture", "Food"],
-                    help="Select up to three interests to personalize your plan"
                 )
-    with col2:
-        # Tips and information
-        st.markdown("### 💡 Travel Tips")
-        st.info("""
-        - Choose up to 3 interests for best results
-        - Consider your travel season
-        - Budget levels affect activity suggestions
-        - Plans are customizable after generation
-        """)
-    # Generate button centered
-    col1, col2, col3 = st.columns([1, 2, 1])
-    with col2:
-        generate_button = st.button(
-            "🎨 Generate Travel Plan",
-            type="primary",
-            use_container_width=True
-        )
-    if generate_button:
-        if not destination:
-            st.error("Please enter a destination!")
-            return
-        if not interests:
-            st.error("Please select at least one interest!")
-            return
-        if len(interests) > 3:
-            st.warning("For best results, please select up to 3 interests.")
-        with st.spinner("🤖 Creating your personalized travel plan..."):
-            travel_plan = generate_travel_plan(
-                destination,
-                days,
-                interests,
-                budget,
-                st.session_state.model,
-                st.session_state.tokenizer
-            )
-            # Create an expander for the success message with trip overview
-            with st.expander("✨ Your travel plan is ready! Click to see trip overview", expanded=True):
-                col1, col2, col3 = st.columns(3)
-                with col1:
-                    st.metric("Destination", destination)
-                with col2:
-                    if days == 1:
-                        st.metric("Duration", f"{days} day")
-                    else:
-                        st.metric("Duration", f"{days} days")
-                with col3:
-                    st.metric("Budget", budget)
-                st.write("**Selected Interests:**", ", ".join(interests))
-            # Display the plan in tabs with improved styling
-            plan_tab, summary_tab = st.tabs(["📋 Detailed Itinerary", "ℹ️ Trip Summary"])
-            with plan_tab:
-                # Add a container for better spacing
-                with st.container():
-                    # Add trip title
-                    st.markdown(f"## 🌍 {days}-Day Trip to {destination}")
-                    st.markdown("---")
-                    # Display the formatted plan
-                    st.markdown(travel_plan)
-                    # Add export options in a nice container
-                    with st.container():
-                        st.markdown("---")
-                        col1, col2 = st.columns([1, 4])
-                        with col1:
-                            st.download_button(
-                                label="📥 Download Plan",
-                                data=travel_plan,
-                                file_name=f"travel_plan_{destination.lower().replace(' ', '_')}.md",
-                                mime="text/markdown",
-                                use_container_width=True
-                            )
-            with summary_tab:
-                # Create three columns for summary information with cards
-                with st.container():
-                    st.markdown("## Trip Overview")
-                    sum_col1, sum_col2, sum_col3 = st.columns(3)
-                    with sum_col1:
-                        with st.container():
-                            st.markdown("### 📍 Destination Details")
-                            st.markdown(f"**Location:** {destination}")
-                            if days == 1:
-                                st.markdown(f"**Duration:** {days} day")
-                            else:
-                                st.markdown(f"**Duration:** {days} days")
-                            st.markdown(f"**Budget Level:** {budget}")
-                    with sum_col2:
-                        with st.container():
-                            st.markdown("### 🎯 Trip Focus")
-                            st.markdown("**Selected Interests:**")
-                            for interest in interests:
-                                st.markdown(f"- {interest}")
-                    with sum_col3:
-                        with st.container():
-                            st.markdown("### ⚠️ Travel Tips")
-                            st.info(
-                                "• Verify opening hours\n"
-                                "• Check current prices\n"
-                                "• Confirm availability\n"
-                                "• Consider seasonal factors"
-                            )
 if __name__ == "__main__":
-    main()

 random.seed(42)
 # Environment setup
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
 class TravelDataset(Dataset):
     def __init__(self, data, tokenizer, max_length=512):
         self.tokenizer = tokenizer
         self.data = data
         self.max_length = max_length
         print(f"Dataset loaded with {len(data)} samples")
         print("Columns:", list(data.columns))
         return len(self.data)
     def __getitem__(self, idx):
         row = self.data.iloc[idx]
+        # Input: query
+        input_text = row['query']
+        # Target: reference_information
+        target_text = row['reference_information']
         # Tokenize inputs
         input_encodings = self.tokenizer(
             'attention_mask': input_encodings['attention_mask'].squeeze(),
             'labels': target_encodings['input_ids'].squeeze()
         }
 def load_dataset():
     """
+    Load the travel planning dataset from CSV.
     """
     try:
         data = pd.read_csv("hf://datasets/osunlp/TravelPlanner/train.csv")
+        required_columns = ['query', 'reference_information']
         for col in required_columns:
             if col not in data.columns:
                 raise ValueError(f"Missing required column: {col}")
+        print(f"Dataset loaded successfully with {len(data)} rows.")
         return data
     except Exception as e:
         print(f"Error loading dataset: {e}")
         sys.exit(1)
 def train_model():
     try:
         # Load dataset
         data = load_dataset()
         # Initialize model and tokenizer
         print("Initializing T5 model and tokenizer...")
         tokenizer = T5Tokenizer.from_pretrained('t5-base', legacy=False)
         model = T5ForConditionalGeneration.from_pretrained('t5-base')
+        # Split data
         train_size = int(0.8 * len(data))
         train_data = data[:train_size]
         val_data = data[train_size:]
         train_dataset = TravelDataset(train_data, tokenizer)
         val_dataset = TravelDataset(val_data, tokenizer)
         training_args = TrainingArguments(
+            output_dir="./trained_travel_planner",
             num_train_epochs=3,
             per_device_train_batch_size=4,
             per_device_eval_batch_size=4,
             evaluation_strategy="steps",
             eval_steps=50,
             save_steps=100,
+            weight_decay=0.01,
+            logging_dir="./logs",
+            logging_steps=10,
             load_best_model_at_end=True,
         )
         data_collator = DataCollatorForSeq2Seq(
             tokenizer=tokenizer,
             model=model,
             padding=True
         )
         trainer = Trainer(
             model=model,
             args=training_args,
             train_dataset=train_dataset,
             eval_dataset=val_dataset,
+            data_collator=data_collator
         )
+        print("Training model...")
         trainer.train()
+        model.save_pretrained("./trained_travel_planner")
+        tokenizer.save_pretrained("./trained_travel_planner")
+        print("Model training complete!")
         return model, tokenizer
     except Exception as e:
+        print(f"Training error: {e}")
         return None, None
+def generate_travel_plan(query, model, tokenizer):
     """
+    Generate a travel plan using the trained model.
     """
     try:
         inputs = tokenizer(
+            query,
             return_tensors="pt",
             max_length=512,
             padding="max_length",
             truncation=True
         )
         if torch.cuda.is_available():
             inputs = {k: v.cuda() for k, v in inputs.items()}
             model = model.cuda()
         outputs = model.generate(
             **inputs,
             max_length=512,
             no_repeat_ngram_size=3,
             num_return_sequences=1
         )
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
+        return f"Error generating travel plan: {e}"
 def main():
     st.set_page_config(
         page_icon="✈️",
         layout="wide"
     )
     st.title("✈️ AI Travel Planner")
+    # Sidebar to train model
     with st.sidebar:
         st.header("Model Management")
         if st.button("Retrain Model"):
+            with st.spinner("Training the model..."):
                 model, tokenizer = train_model()
+                if model:
                     st.session_state['model'] = model
                     st.session_state['tokenizer'] = tokenizer
+                    st.success("Model retrained successfully!")
+                else:
+                    st.error("Model retraining failed.")
+    # Load model if not already loaded
+    if 'model' not in st.session_state:
+        with st.spinner("Loading model..."):
+            model, tokenizer = train_model()
+            st.session_state['model'] = model
+            st.session_state['tokenizer'] = tokenizer
+    # Input query
+    st.subheader("Plan Your Trip")
+    query = st.text_area("Enter your trip query (e.g., 'Plan a 3-day trip to Paris focusing on culture and food')")
+    if st.button("Generate Plan"):
+        if not query:
+            st.error("Please enter a query.")
+        else:
+            with st.spinner("Generating your travel plan..."):
+                travel_plan = generate_travel_plan(
+                    query,
+                    st.session_state['model'],
+                    st.session_state['tokenizer']
                 )
+                st.subheader("Your Travel Plan")
+                st.write(travel_plan)
 if __name__ == "__main__":
+    main()