Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""yieldpredictionrandomforest.ipynb | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1-bKSlitmr01NPLLG_ZrTBoZ-3hCHjwM6 | |
""" | |
import pandas as pd | |
import pickle | |
import numpy as np | |
import requests | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import classification_report | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error | |
# Load the dataset | |
crop_data = pd.read_csv('combined_data.csv') | |
# Convert district names to title case | |
crop_data['District'] = crop_data['District'].str.title() | |
# Function to fetch humidity and temperature from a weather API | |
def get_weather_data(district): | |
api_key = '2146dd5e46379bba1811371d760fbf18' # Replace 'YOUR_API_KEY' with your actual API key | |
base_url = 'http://api.openweathermap.org/data/2.5/weather?' | |
# Build API request URL | |
complete_url = base_url + 'q=' + district + '&appid=' + api_key | |
# Send GET request to the API | |
response = requests.get(complete_url) | |
# Parse response data | |
if response.status_code == 200: | |
data = response.json() | |
# Extract humidity and temperature | |
humidity = data['main']['humidity'] | |
temperature = data['main']['temp'] - 273.15 # Convert from Kelvin to Celsius | |
return humidity, temperature | |
else: | |
print("Failed to fetch weather data. Please try again later.") | |
return None, None | |
# Function to calculate average rainfall for a given state | |
def calculate_average_rainfall(state): | |
# Define ranges for average rainfall for each state (in cm) | |
state_rainfall_ranges = { | |
'Rajasthan': (5, 20), | |
'Manipur': (100, 300), | |
'Madhya Pradesh': (70, 150), | |
'Puducherry': (80, 200), | |
'Bihar': (80, 180), | |
'Andhra Pradesh': (80, 150), | |
'Chhattisgarh': (100, 200), | |
'Uttar Pradesh': (70, 150), | |
'Andaman and Nicobar Islands': (150, 300), | |
'Telangana': (70, 150), | |
'Karnataka': (70, 150), | |
'Gujarat': (40, 100), | |
'Dadra and Nagar Haveli': (100, 200), | |
'Meghalaya': (200, 400), | |
'Tamil Nadu': (70, 150), | |
'Maharashtra': (70, 150), | |
'Kerala': (200, 400), | |
'Assam': (200, 400), | |
'Goa': (200, 400), | |
'Mizoram': (200, 400), | |
'West Bengal': (200, 400), | |
'Jammu and Kashmir': (30, 80), | |
'Himachal Pradesh': (50, 120), | |
'Haryana': (40, 100), | |
'Odisha': (100, 200), | |
'Delhi': (30, 80), | |
'Nagaland': (200, 400), | |
'Tripura': (200, 400), | |
'Punjab': (30, 80), | |
'Uttarakhand': (100, 200), | |
'Arunachal Pradesh': (200, 400), | |
'Jharkhand': (100, 200), | |
'Chandigarh': (30, 80), | |
'Sikkim': (200, 400), | |
'Daman and Diu': (100, 200) | |
} | |
# Get the range for the given state | |
rainfall_range = state_rainfall_ranges.get(state) | |
if rainfall_range: | |
# Calculate average rainfall within the range for the state | |
average_rainfall = np.random.uniform(rainfall_range[0], rainfall_range[1]) | |
return average_rainfall | |
else: | |
print(f"Average rainfall data not available for {state}.") | |
return None | |
def recommend_top_n_crops(state, district, soil_type, season,Area, n=3): | |
# Fetch average rainfall for the given state | |
avg_rainfall = calculate_average_rainfall(state) | |
if avg_rainfall is not None: | |
# Fetch humidity and temperature for the given district | |
humidity, temperature = get_weather_data(district) | |
if humidity is not None and temperature is not None: | |
# Assuming random values for N, P, and K based on soil type | |
np.random.seed(42) # for reproducibility | |
random_n = np.random.uniform(0.1, 0.5) | |
random_p = np.random.uniform(0.01, 0.05) | |
random_k = np.random.uniform(0.2, 0.4) | |
# Map input strings to their encoded values | |
state_encoded = state_encodings[state] | |
district_encoded = district_encodings[district] | |
soil_type_encoded = soil_type_encodings[soil_type] | |
season_encoded = season_type_encodings[season] | |
# Prepare input features for prediction | |
input_features = np.array([[state_encoded, district_encoded, soil_type_encoded, avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded, 0, 0]]) | |
# Make prediction using the trained model | |
predicted_probs = model.predict_proba(input_features)[0] | |
# Sort predicted probabilities and get top N indices | |
top_n_indices = np.argsort(predicted_probs)[::-1][:n] | |
# Get top N crop recommendations | |
top_n_crops = [crop_encodings_inverse[idx] for idx in top_n_indices] | |
# return top_n_crops | |
def yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area): | |
crop_encoded=crop_type_encodings[crop] | |
input_features1= np.array([[state_encoded,district_encoded,soil_type_encoded,crop_encoded,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area]]) | |
pred_yield=model_rf.predict(input_features1) | |
print(pred_yield) | |
for crop in top_n_crops: | |
print(crop) | |
yeild_pred(state_encoded,district_encoded,soil_type_encoded,crop,avg_rainfall, temperature, humidity, random_n, random_p, random_k, season_encoded,Area) | |
else: | |
print("Failed to fetch weather data. Please try again later.") | |
return None | |
else: | |
print("Failed to fetch average rainfall data. Please try again later.") | |
return None | |
label_encoder = LabelEncoder() | |
crop_data['State_Encoded'] = label_encoder.fit_transform(crop_data['State']) | |
crop_data['District_Encoded'] = label_encoder.fit_transform(crop_data['District']) | |
crop_data['Soil_Type_Encoded'] = label_encoder.fit_transform(crop_data['Soil Type']) | |
crop_data['Crop_Type_Encoded'] = label_encoder.fit_transform(crop_data['Crop']) | |
crop_data['Season_Encoded'] = label_encoder.fit_transform(crop_data['Season']) | |
# Create dictionaries to map original names to encoded labels | |
state_encodings = dict(zip(crop_data['State'], crop_data['State_Encoded'])) | |
district_encodings = dict(zip(crop_data['District'], crop_data['District_Encoded'])) | |
soil_type_encodings = dict(zip(crop_data['Soil Type'], crop_data['Soil_Type_Encoded'])) | |
season_type_encodings = dict(zip(crop_data['Season'], crop_data['Season_Encoded'])) | |
crop_type_encodings = dict(zip(crop_data['Crop'], crop_data['Crop_Type_Encoded'])) | |
crop_encodings_inverse = dict(zip(crop_data['Crop_Type_Encoded'], crop_data['Crop'])) | |
# Split data into features and target | |
X = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded', 'Production', 'Yield']] | |
y = crop_data['Crop_Type_Encoded'] | |
# Split data into train and test sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Train Random Forest model with fine-tuned hyperparameters | |
model = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42) | |
model.fit(X_train, y_train) | |
with open('crop_recommendation_model.pkl', 'wb') as file: | |
pickle.dump(model, file) | |
# Test the model | |
test_accuracy = model.score(X_test, y_test) | |
print("Test Accuracy:", test_accuracy) | |
# Evaluate the model with additional measures | |
y_pred = model.predict(X_test) | |
conf_matrix = confusion_matrix(y_test, y_pred) | |
print("Confusion Matrix:") | |
print(conf_matrix) | |
print(classification_report(y_test, y_pred)) | |
# Split data into features and target | |
X1 = crop_data[['State_Encoded', 'District_Encoded', 'Soil_Type_Encoded','Crop_Type_Encoded', 'Rainfall (cm)', 'Temperature (°C)', 'Humidity (%)', 'N', 'P', 'K', 'Season_Encoded','Area']] | |
y1 = crop_data['Yield'] | |
# Split data into training and testing sets | |
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2, random_state=42) | |
# Train RandomForestRegressor model | |
model_rf = RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=5, min_samples_leaf=2, random_state=42) | |
model_rf.fit(X1_train, y1_train) | |
with open('yield_pred.pkl', 'wb') as file: | |
pickle.dump(model_rf, file) | |
# Predict yield for the testing data | |
y1_pred = model_rf.predict(X1_test) | |
# Evaluate the model | |
mse = mean_squared_error(y1_test, y1_pred) | |
r2 = r2_score(y1_test, y1_pred) | |
test_accuracy1 = model_rf.score(X1_test, y1_test) | |
print("Mean Squared Error:", mse) | |
print("R-squared Score:", r2) | |
print("Test Accuracy:", test_accuracy1) | |
# Make predictions | |
# y_pred = model1.predict(X_test) | |
# Example usage: | |
state = 'Rajasthan' | |
district = 'Kota' | |
soil_type = 'Alluvial' | |
season = 'Whole Year' | |
Area=1 | |
top_3_crops = recommend_top_n_crops(state, district, soil_type, season,Area, n=3) | |
print('Top 3 Recommended Crops:', top_3_crops) |