| | import pandas as pd
|
| | import numpy as np
|
| | import requests
|
| | from sklearn.ensemble import RandomForestRegressor
|
| | from sklearn.preprocessing import OneHotEncoder
|
| | from sklearn.compose import ColumnTransformer
|
| | from sklearn.pipeline import Pipeline
|
| | import joblib
|
| |
|
| | API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
|
| | WEATHER_CACHE = {}
|
| |
|
| |
|
| | SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
|
| | 3: 'Spring', 4: 'Spring', 5: 'Spring',
|
| | 6: 'Summer', 7: 'Summer', 8: 'Summer',
|
| | 9: 'Fall', 10: 'Fall', 11: 'Fall'}
|
| |
|
| | def fetch_weather(city, state, api_key=API_KEY):
|
| | key = f"{city},{state}"
|
| | if key in WEATHER_CACHE:
|
| | return WEATHER_CACHE[key]
|
| | geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
|
| | try:
|
| | geo_resp = requests.get(geo_url)
|
| | geo_resp.raise_for_status()
|
| | geo_data = geo_resp.json()
|
| | if not geo_data:
|
| | return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| | lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
|
| | weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
|
| | weather_resp = requests.get(weather_url)
|
| | weather_resp.raise_for_status()
|
| | data = weather_resp.json()
|
| | weather = {
|
| | 'temperature': data['main']['temp'],
|
| | 'humidity': data['main']['humidity'],
|
| | 'condition': data['weather'][0]['main']
|
| | }
|
| | WEATHER_CACHE[key] = weather
|
| | return weather
|
| | except Exception as e:
|
| | print(f"Weather fetch error for {city}, {state}: {e}")
|
| | return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| |
|
| | def extract_season(month):
|
| | return SEASON_MAP.get(month, 'Unknown')
|
| |
|
| | def load_and_prepare_data(csv_path):
|
| | df = pd.read_csv(csv_path)
|
| |
|
| | df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
|
| | df['order_month'] = df['Order Date'].dt.month
|
| | df['order_day_of_week'] = df['Order Date'].dt.dayofweek
|
| | df['season'] = df['order_month'].apply(extract_season)
|
| |
|
| | np.random.seed(42)
|
| | df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
|
| |
|
| | weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
|
| | df['temperature'] = [w['temperature'] for w in weather_features]
|
| | df['humidity'] = [w['humidity'] for w in weather_features]
|
| | df['condition'] = [w['condition'] for w in weather_features]
|
| | return df
|
| |
|
| | def train_discount_model(df):
|
| | features = [
|
| | 'Category', 'Sub-Category', 'Product ID', 'Sales',
|
| | 'City', 'State', 'Segment', 'Ship Mode',
|
| | 'order_month', 'order_day_of_week', 'season',
|
| | 'temperature', 'humidity', 'condition'
|
| | ]
|
| | X = df[features]
|
| | y = df['discount']
|
| | categorical = [
|
| | 'Category', 'Sub-Category', 'Product ID', 'City', 'State',
|
| | 'Segment', 'Ship Mode', 'season', 'condition'
|
| | ]
|
| | numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
|
| | preprocessor = ColumnTransformer([
|
| | ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
|
| | ('num', 'passthrough', numeric)
|
| | ])
|
| | model = Pipeline([
|
| | ('pre', preprocessor),
|
| | ('reg', RandomForestRegressor(n_estimators=100, random_state=42))
|
| | ])
|
| | model.fit(X, y)
|
| | return model
|
| |
|
| | if __name__ == "__main__":
|
| | print("Loading and preparing data...")
|
| | df = load_and_prepare_data('train.csv')
|
| | print("Training model...")
|
| | model = train_discount_model(df)
|
| | joblib.dump(model, 'discount_model.joblib')
|
| | print("Model trained and saved as discount_model.joblib.") |