# -*- coding: utf-8 -*-
"""yieldpredictionrandomforest.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1-bKSlitmr01NPLLG_ZrTBoZ-3hCHjwM6
"""

import pandas as pd
import pickle
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error

# Load the dataset
crop_data = pd.read_csv('combined_data.csv')

# Convert district names to title case
crop_data['District'] = crop_data['District'].str.title()

# Function to fetch humidity and temperature from a weather API
def get_weather_data(district):
    api_key = '2146dd5e46379bba1811371d760fbf18'  # Replace 'YOUR_API_KEY' with your actual API key
    base_url = 'http://api.openweathermap.org/data/2.5/weather?'

    # Build API request URL
    complete_url = base_url + 'q=' + district + '&appid=' + api_key

    # Send GET request to the API
    response = requests.get(complete_url)

    # Parse response data
    if response.status_code == 200:
        data = response.json()
        # Extract humidity and temperature
        humidity = data['main']['humidity']
        temperature = data['main']['temp'] - 273.15  # Convert from Kelvin to Celsius
        return humidity, temperature
    else:
        print("Failed to fetch weather data. Please try again later.")
        return None, None

# Function to calculate average rainfall for a given state
def calculate_average_rainfall(state):
    # Define ranges for average rainfall for each state (in cm)
    state_rainfall_ranges = {
        'Rajasthan': (5, 20),
        'Manipur': (100, 300),
        'Madhya Pradesh': (70, 150),
        'Puducherry': (80, 200),
        'Bihar': (80, 180),
        'Andhra Pradesh': (80, 150),
        'Chhattisgarh': (100, 200),
        'Uttar Pradesh': (70, 150),
        'Andaman and Nicobar Islands': (150, 300),
        'Telangana': (70, 150),
        'Karnataka': (70, 150),
        'Gujarat': (40, 100),
        'Dadra and Nagar Haveli': (100, 200),
        'Meghalaya': (200, 400),
        'Tamil Nadu': (70, 150),
        'Maharashtra': (70, 150),
        'Kerala': (200, 400),
        'Assam': (200, 400),
        'Goa': (200, 400),
        'Mizoram': (200, 400),
        'West Bengal': (200, 400),
        'Jammu and Kashmir': (30, 80),
        'Himachal Pradesh': (50, 120),
        'Haryana': (40, 100),
        'Odisha': (100, 200),
        'Delhi': (30, 80),
        'Nagaland': (200, 400),
        'Tripura': (200, 400),
        'Punjab': (30, 80),
        'Uttarakhand': (100, 200),
        'Arunachal Pradesh': (200, 400),
        'Jharkhand': (100, 200),
        'Chandigarh': (30, 80),
        'Sikkim': (200, 400),
        'Daman and Diu': (100, 200)
    }

    # Get the range for the given state
    rainfall_range = state_rainfall_ranges.get(state)
    if rainfall_range:
        # Calculate average rainfall within the range for the state
        average_rainfall = np.random.uniform(rainfall_range[0], rainfall_range[1])
        return average_rainfall
    else:
        print(f"Average rainfall data not available for {state}.")
        return None

def recommend_top_n_crops(state, district, soil_type, season,Area, n=3):
    # Fetch average rainfall for the given state
    avg_rainfall = calculate_average_rainfall(state)

    if avg_rainfall is not None:
        # Fetch humidity and temperature for the given district
        humidity, temperature = get_weather_data(district)

        if humidity is not None and temperature is not None:
            # Assuming random values for N, P, and K based on soil type
            np.random.seed(42)  # for reproducibility
            random_n = np.random.uniform(0.1, 0.5)
            random_p = np.random.uniform(0.01, 0.05)
            random_k = np.random.uniform(0.2, 0.4)

            # Map input strings to their encoded values
            state_encoded = state_encodings[state]
            district_encoded = district_encodings[district]
            soil_type_encoded = soil_type_encodings[soil_type]
            season_encoded = season_type_encodings[season]

            # Prepare input features for prediction
            input_features = np.array([[state_encoded, district_encoded, soil_type_encoded, avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded, 0, 0]])

            # Make prediction using the trained model
            predicted_probs = model.predict_proba(input_features)[0]

            # Sort predicted probabilities and get top N indices
            top_n_indices = np.argsort(predicted_probs)[::-1][:n]

            # Get top N crop recommendations
            top_n_crops = [crop_encodings_inverse[idx] for idx in top_n_indices]

            # return top_n_crops

            def yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area):
               crop_encoded=crop_type_encodings[crop]
               input_features1= np.array([[state_encoded,district_encoded,soil_type_encoded,crop_encoded,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area]])
               pred_yield=model_rf.predict(input_features1)
               print(pred_yield)

            for crop in top_n_crops:
              print(crop)
              yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area)

        else:
            print("Failed to fetch weather data. Please try again later.")
            return None
    else:
        print("Failed to fetch average rainfall data. Please try again later.")
        return None

label_encoder = LabelEncoder()
crop_data['State_Encoded'] = label_encoder.fit_transform(crop_data['State'])
crop_data['District_Encoded'] = label_encoder.fit_transform(crop_data['District'])
crop_data['Soil_Type_Encoded'] = label_encoder.fit_transform(crop_data['Soil Type'])
crop_data['Crop_Type_Encoded'] = label_encoder.fit_transform(crop_data['Crop'])
crop_data['Season_Encoded'] = label_encoder.fit_transform(crop_data['Season'])

# Create dictionaries to map original names to encoded labels
state_encodings = dict(zip(crop_data['State'], crop_data['State_Encoded']))
district_encodings = dict(zip(crop_data['District'], crop_data['District_Encoded']))
soil_type_encodings = dict(zip(crop_data['Soil Type'], crop_data['Soil_Type_Encoded']))
season_type_encodings = dict(zip(crop_data['Season'], crop_data['Season_Encoded']))
crop_type_encodings = dict(zip(crop_data['Crop'], crop_data['Crop_Type_Encoded']))
crop_encodings_inverse = dict(zip(crop_data['Crop_Type_Encoded'], crop_data['Crop']))

# Split data into features and target
X = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded', 'Production', 'Yield']]
y = crop_data['Crop_Type_Encoded']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model with fine-tuned hyperparameters
model = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42)
model.fit(X_train, y_train)
with open('crop_recommendation_model.pkl', 'wb') as file:
    pickle.dump(model, file)

# Test the model
test_accuracy = model.score(X_test, y_test)
print("Test Accuracy:", test_accuracy)

# Evaluate the model with additional measures
y_pred = model.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print(classification_report(y_test, y_pred))


# Split data into features and target
X1 = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded','Crop_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded','Area']]
y1 = crop_data['Yield']

# Split data into training and testing sets
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42)

# Train RandomForestRegressor model
model_rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42)
model_rf.fit(X1_train, y1_train)
with open('yield_pred.pkl', 'wb') as file:
    pickle.dump(model_rf, file)

# Predict yield for the testing data
y1_pred = model_rf.predict(X1_test)

# Evaluate the model
mse = mean_squared_error(y1_test, y1_pred)
r2 = r2_score(y1_test, y1_pred)
test_accuracy1 = model_rf.score(X1_test, y1_test)
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)
print("Test Accuracy:", test_accuracy1)


# Make predictions
# y_pred = model1.predict(X_test)


# Example usage:

state = 'Rajasthan'
district = 'Kota'
soil_type = 'Alluvial'
season = 'Whole Year'
Area=1

top_3_crops = recommend_top_n_crops(state, district, soil_type, season,Area, n=3)
print('Top 3 Recommended Crops:', top_3_crops)