Spaces:
Running
Running
File size: 9,417 Bytes
9eada9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
# -*- coding: utf-8 -*-
"""yieldpredictionrandomforest.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1-bKSlitmr01NPLLG_ZrTBoZ-3hCHjwM6
"""
import pandas as pd
import pickle
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
# Load the dataset
crop_data = pd.read_csv('combined_data.csv')
# Convert district names to title case
crop_data['District'] = crop_data['District'].str.title()
# Function to fetch humidity and temperature from a weather API
def get_weather_data(district):
api_key = '2146dd5e46379bba1811371d760fbf18' # Replace 'YOUR_API_KEY' with your actual API key
base_url = 'http://api.openweathermap.org/data/2.5/weather?'
# Build API request URL
complete_url = base_url + 'q=' + district + '&appid=' + api_key
# Send GET request to the API
response = requests.get(complete_url)
# Parse response data
if response.status_code == 200:
data = response.json()
# Extract humidity and temperature
humidity = data['main']['humidity']
temperature = data['main']['temp'] - 273.15 # Convert from Kelvin to Celsius
return humidity, temperature
else:
print("Failed to fetch weather data. Please try again later.")
return None, None
# Function to calculate average rainfall for a given state
def calculate_average_rainfall(state):
# Define ranges for average rainfall for each state (in cm)
state_rainfall_ranges = {
'Rajasthan': (5, 20),
'Manipur': (100, 300),
'Madhya Pradesh': (70, 150),
'Puducherry': (80, 200),
'Bihar': (80, 180),
'Andhra Pradesh': (80, 150),
'Chhattisgarh': (100, 200),
'Uttar Pradesh': (70, 150),
'Andaman and Nicobar Islands': (150, 300),
'Telangana': (70, 150),
'Karnataka': (70, 150),
'Gujarat': (40, 100),
'Dadra and Nagar Haveli': (100, 200),
'Meghalaya': (200, 400),
'Tamil Nadu': (70, 150),
'Maharashtra': (70, 150),
'Kerala': (200, 400),
'Assam': (200, 400),
'Goa': (200, 400),
'Mizoram': (200, 400),
'West Bengal': (200, 400),
'Jammu and Kashmir': (30, 80),
'Himachal Pradesh': (50, 120),
'Haryana': (40, 100),
'Odisha': (100, 200),
'Delhi': (30, 80),
'Nagaland': (200, 400),
'Tripura': (200, 400),
'Punjab': (30, 80),
'Uttarakhand': (100, 200),
'Arunachal Pradesh': (200, 400),
'Jharkhand': (100, 200),
'Chandigarh': (30, 80),
'Sikkim': (200, 400),
'Daman and Diu': (100, 200)
}
# Get the range for the given state
rainfall_range = state_rainfall_ranges.get(state)
if rainfall_range:
# Calculate average rainfall within the range for the state
average_rainfall = np.random.uniform(rainfall_range[0], rainfall_range[1])
return average_rainfall
else:
print(f"Average rainfall data not available for {state}.")
return None
def recommend_top_n_crops(state, district, soil_type, season,Area, n=3):
# Fetch average rainfall for the given state
avg_rainfall = calculate_average_rainfall(state)
if avg_rainfall is not None:
# Fetch humidity and temperature for the given district
humidity, temperature = get_weather_data(district)
if humidity is not None and temperature is not None:
# Assuming random values for N, P, and K based on soil type
np.random.seed(42) # for reproducibility
random_n = np.random.uniform(0.1, 0.5)
random_p = np.random.uniform(0.01, 0.05)
random_k = np.random.uniform(0.2, 0.4)
# Map input strings to their encoded values
state_encoded = state_encodings[state]
district_encoded = district_encodings[district]
soil_type_encoded = soil_type_encodings[soil_type]
season_encoded = season_type_encodings[season]
# Prepare input features for prediction
input_features = np.array([[state_encoded, district_encoded, soil_type_encoded, avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded, 0, 0]])
# Make prediction using the trained model
predicted_probs = model.predict_proba(input_features)[0]
# Sort predicted probabilities and get top N indices
top_n_indices = np.argsort(predicted_probs)[::-1][:n]
# Get top N crop recommendations
top_n_crops = [crop_encodings_inverse[idx] for idx in top_n_indices]
# return top_n_crops
def yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area):
crop_encoded=crop_type_encodings[crop]
input_features1= np.array([[state_encoded,district_encoded,soil_type_encoded,crop_encoded,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area]])
pred_yield=model_rf.predict(input_features1)
print(pred_yield)
for crop in top_n_crops:
print(crop)
yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area)
else:
print("Failed to fetch weather data. Please try again later.")
return None
else:
print("Failed to fetch average rainfall data. Please try again later.")
return None
label_encoder = LabelEncoder()
crop_data['State_Encoded'] = label_encoder.fit_transform(crop_data['State'])
crop_data['District_Encoded'] = label_encoder.fit_transform(crop_data['District'])
crop_data['Soil_Type_Encoded'] = label_encoder.fit_transform(crop_data['Soil Type'])
crop_data['Crop_Type_Encoded'] = label_encoder.fit_transform(crop_data['Crop'])
crop_data['Season_Encoded'] = label_encoder.fit_transform(crop_data['Season'])
# Create dictionaries to map original names to encoded labels
state_encodings = dict(zip(crop_data['State'], crop_data['State_Encoded']))
district_encodings = dict(zip(crop_data['District'], crop_data['District_Encoded']))
soil_type_encodings = dict(zip(crop_data['Soil Type'], crop_data['Soil_Type_Encoded']))
season_type_encodings = dict(zip(crop_data['Season'], crop_data['Season_Encoded']))
crop_type_encodings = dict(zip(crop_data['Crop'], crop_data['Crop_Type_Encoded']))
crop_encodings_inverse = dict(zip(crop_data['Crop_Type_Encoded'], crop_data['Crop']))
# Split data into features and target
X = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded', 'Production', 'Yield']]
y = crop_data['Crop_Type_Encoded']
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train Random Forest model with fine-tuned hyperparameters
model = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42)
model.fit(X_train, y_train)
with open('crop_recommendation_model.pkl', 'wb') as file:
pickle.dump(model, file)
# Test the model
test_accuracy = model.score(X_test, y_test)
print("Test Accuracy:", test_accuracy)
# Evaluate the model with additional measures
y_pred = model.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print(classification_report(y_test, y_pred))
# Split data into features and target
X1 = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded','Crop_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded','Area']]
y1 = crop_data['Yield']
# Split data into training and testing sets
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42)
# Train RandomForestRegressor model
model_rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42)
model_rf.fit(X1_train, y1_train)
with open('yield_pred.pkl', 'wb') as file:
pickle.dump(model_rf, file)
# Predict yield for the testing data
y1_pred = model_rf.predict(X1_test)
# Evaluate the model
mse = mean_squared_error(y1_test, y1_pred)
r2 = r2_score(y1_test, y1_pred)
test_accuracy1 = model_rf.score(X1_test, y1_test)
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)
print("Test Accuracy:", test_accuracy1)
# Make predictions
# y_pred = model1.predict(X_test)
# Example usage:
state = 'Rajasthan'
district = 'Kota'
soil_type = 'Alluvial'
season = 'Whole Year'
Area=1
top_3_crops = recommend_top_n_crops(state, district, soil_type, season,Area, n=3)
print('Top 3 Recommended Crops:', top_3_crops) |