import streamlit as st import pandas as pd import numpy as np import joblib import matplotlib.pyplot as plt import os import openai from sklearn.preprocessing import LabelEncoder import requests from io import BytesIO import gdown # --- Set page configuration --- st.set_page_config( page_title="The Guide", page_icon="🚗", layout="wide", initial_sidebar_state="expanded" ) # --- Custom CSS for better styling --- st.markdown(""" """, unsafe_allow_html=True) # --- Cache functions --- def create_brand_categories(): return { 'luxury_brands': { 'rolls-royce': (300000, 600000), 'bentley': (200000, 500000), 'lamborghini': (250000, 550000), 'ferrari': (250000, 600000), 'mclaren': (200000, 500000), 'aston-martin': (150000, 400000), 'maserati': (100000, 300000) }, 'premium_brands': { 'porsche': (60000, 150000), 'bmw': (40000, 90000), 'mercedes-benz': (45000, 95000), 'audi': (35000, 85000), 'lexus': (40000, 80000), 'jaguar': (45000, 90000), 'land-rover': (40000, 90000), 'volvo': (35000, 75000), 'infiniti': (35000, 70000), 'cadillac': (40000, 85000), 'tesla': (40000, 100000) }, 'mid_tier_brands': { 'acura': (30000, 50000), 'lincoln': (35000, 65000), 'buick': (25000, 45000), 'chrysler': (25000, 45000), 'alfa-romeo': (35000, 60000), 'genesis': (35000, 60000) }, 'standard_brands': { 'toyota': (20000, 35000), 'honda': (20000, 35000), 'volkswagen': (20000, 35000), 'mazda': (20000, 32000), 'subaru': (22000, 35000), 'hyundai': (18000, 32000), 'kia': (17000, 30000), 'ford': (20000, 40000), 'chevrolet': (20000, 38000), 'gmc': (25000, 45000), 'jeep': (25000, 45000), 'dodge': (22000, 40000), 'ram': (25000, 45000), 'nissan': (18000, 32000) }, 'economy_brands': { 'mitsubishi': (15000, 25000), 'suzuki': (12000, 22000), 'fiat': (15000, 25000), 'mini': (20000, 35000), 'smart': (15000, 25000) }, 'discontinued_brands': { 'pontiac': (5000, 15000), 'saturn': (4000, 12000), 'mercury': (4000, 12000), 'oldsmobile': (3000, 10000), 'plymouth': (3000, 10000), 'saab': (5000, 15000) } } @st.cache_resource def download_file_from_google_drive(file_id): """Downloads a file from Google Drive using gdown.""" url = f"https://drive.google.com/uc?id={file_id}" try: with st.spinner('Downloading from Google Drive...'): output = f"temp_{file_id}.pkl" gdown.download(url, output, quiet=False) with open(output, 'rb') as f: content = f.read() # Clean up the temporary file os.remove(output) return content except Exception as e: st.error(f"Error downloading from Google Drive: {str(e)}") raise e @st.cache_data def load_datasets(): """Load the dataset from Google Drive.""" dataset_file_id = "1emG-BQ3-x4xsMAGMEznkh1ACdlAj5Dn1" try: with st.spinner('Loading dataset...'): content = download_file_from_google_drive(dataset_file_id) # Use BytesIO to read the CSV content original_data = pd.read_csv(BytesIO(content), low_memory=False) # Ensure column names match the model's expectations original_data.columns = original_data.columns.str.strip().str.capitalize() return original_data except Exception as e: st.error(f"Error loading dataset: {str(e)}") raise e @st.cache_resource def load_model_and_encodings(): """Load model from Google Drive and create encodings.""" model_file_id = "1wKixkdW2pVKEpJW-N1QIyKUr2nYirU7I" try: # Show loading message with st.spinner('Loading model...'): model_content = download_file_from_google_drive(model_file_id) model = joblib.load(BytesIO(model_content)) # Load data for encodings original_data = load_datasets() # Create fresh encoders from data label_encoders = {} categorical_features = ['Make', 'model', 'condition', 'fuel', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color'] for feature in categorical_features: if feature in original_data.columns: le = LabelEncoder() unique_values = original_data[feature].fillna('unknown').str.strip().unique() le.fit(unique_values) label_encoders[feature.lower()] = le return model, label_encoders except Exception as e: st.error(f"Error loading model: {str(e)}") raise e # --- Load data and models --- try: original_data = load_datasets() model, label_encoders = load_model_and_encodings() # Using the new function except Exception as e: st.error(f"Error loading data or models: {str(e)}") st.stop() # --- Define categorical and numeric features --- # From model.py # --- Define features --- numeric_features = ['year', 'odometer', 'age', 'age_squared', 'mileage_per_year'] # Update the categorical features list to use lowercase categorical_features = ['make', 'model', 'condition', 'fuel', 'title_status', 'transmission', 'drive', 'size', 'type', 'paint_color'] required_features = numeric_features + categorical_features # --- Feature engineering functions --- def create_features(df): df = df.copy() current_year = 2024 df['age'] = current_year - df['year'] df['age_squared'] = df['age'] ** 2 df['mileage_per_year'] = np.clip(df['odometer'] / (df['age'] + 1), 0, 200000) return df def prepare_input(input_dict, label_encoders): # Convert None values to 'unknown' for safe handling input_dict = {k: v if v is not None else 'unknown' for k, v in input_dict.items()} # Convert input dictionary to DataFrame input_df = pd.DataFrame([input_dict]) # Ensure columns match the model's expected casing feature_name_mapping = { "make": "Make", # Match casing for 'Make' "model": "Model", # Match casing for 'Model' "condition": "Condition", "fuel": "Fuel", "title_status": "Title_status", "transmission": "Transmission", "drive": "Drive", "size": "Size", "type": "Type", "paint_color": "Paint_color", "year": "Year", "odometer": "Odometer", "age": "Age", "age_squared": "Age_squared", "mileage_per_year": "Mileage_per_year" } input_df.rename(columns=feature_name_mapping, inplace=True) # Numeric feature conversions input_df["Year"] = pd.to_numeric(input_df.get("Year", 0), errors="coerce") input_df["Odometer"] = pd.to_numeric(input_df.get("Odometer", 0), errors="coerce") # Feature engineering current_year = 2024 input_df["Age"] = current_year - input_df["Year"] input_df["Age_squared"] = input_df["Age"] ** 2 input_df["Mileage_per_year"] = input_df["Odometer"] / (input_df["Age"] + 1) input_df["Mileage_per_year"] = input_df["Mileage_per_year"].clip(0, 200000) # Encode categorical features for feature, encoded_feature in feature_name_mapping.items(): if feature in label_encoders: input_df[encoded_feature] = input_df[encoded_feature].fillna("unknown").astype(str).str.strip() try: input_df[encoded_feature] = label_encoders[feature].transform(input_df[encoded_feature]) except ValueError: input_df[encoded_feature] = 0 # Assign default for unseen values # Ensure all required features are present for feature in model.feature_names_in_: if feature not in input_df: input_df[feature] = 0 # Default value for missing features # Reorder columns input_df = input_df[model.feature_names_in_] return input_df # --- Styling functions --- st.markdown(""" """, unsafe_allow_html=True) def style_metric_container(label, value): st.markdown(f"""
{label}
{value}
Ask me anything about cars! For example: 'What's a good car under $30,000 with low mileage?'
A cutting-edge data science project leveraging machine learning to detect which car would be best for you.
""", unsafe_allow_html=True) inputs, predict_button = create_prediction_interface() # Prepare base inputs base_inputs = { "year": inputs.get("year", 2022), "make": inputs.get("make", "toyota").lower(), "model": inputs.get("model", "camry"), "odometer": inputs.get("odometer", 20000), "condition": inputs.get("condition", "good"), "fuel": inputs.get("fuel", "gas"), "title_status": inputs.get("title_status", "clean"), "transmission": inputs.get("transmission", "automatic"), "drive": inputs.get("drive", "fwd"), "size": inputs.get("size", "mid-size"), "paint_color": inputs.get("paint_color", "black"), "type": inputs.get("type", "sedan") } if base_inputs["condition"] == "new": base_inputs["odometer"] = 0 if predict_button: st.write(f"Analyzing {base_inputs['year']} {base_inputs['make'].title()} {base_inputs['model'].title()}...") prediction_results = predict_with_ranges(base_inputs, model, label_encoders) st.markdown(f""" ### Price Analysis - **Estimated Range**: ${prediction_results['min_price']:,.2f} - ${prediction_results['max_price']:,.2f} - **Model Prediction**: ${prediction_results['predicted_price']:,.2f} *Note: Range based on market data, condition, and mileage* """) # Generate and display the graph fig = create_market_trends_plot_with_model(model, base_inputs["make"], base_inputs, label_encoders) if fig: st.pyplot(fig) else: st.warning("No graph generated. Please check your data or selection.") with col2: create_assistant_section() if __name__ == "__main__": try: # Load data and model original_data = load_datasets() model, label_encoders = load_model_and_encodings() # Inspect model features inspect_model_features(model) # Call the main function main(model, label_encoders) except Exception as e: st.error(f"Error loading data or models: {str(e)}") st.stop()