diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -1,1688 +1,2004 @@ -from flask import Flask, render_template, request, jsonify -import numpy as np -import pandas as pd -import joblib -import os -from sklearn.svm import SVR -from sklearn.model_selection import train_test_split -from sklearn.metrics import mean_squared_error, r2_score -from sklearn.neighbors import KNeighborsClassifier -from sklearn.preprocessing import StandardScaler -from sklearn.ensemble import RandomForestClassifier -from sklearn.tree import DecisionTreeClassifier -from sklearn import svm -from sklearn.naive_bayes import GaussianNB # <--- Add this import -from sklearn.feature_extraction.text import CountVectorizer -from textblob import TextBlob -import traceback -from flask_cors import CORS -from werkzeug.utils import secure_filename # For secure file names -import io # To read CSV from memory -import re -from sklearn.cluster import KMeans, DBSCAN -from PIL import Image -import matplotlib.pyplot as plt -from joblib import load # ✅ This is the missing line -import traceback -import pickle -from sklearn.svm import SVC -from sklearn.datasets import make_classification -import plotly.graph_objs as go -import json -import requests -from PIL import Image - - -# from transformers import pipeline -from dotenv import load_dotenv -import os -from urllib.parse import urlparse -import tldextract -import string - - -# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline - -# model_name = "microsoft/deberta-v3-small" - -# tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) -# model = AutoModelForSequenceClassification.from_pretrained(model_name) - -# bert_checker = pipeline("text-classification", model=model, tokenizer=tokenizer) - -# Load environment variables from .env -load_dotenv() -#spam url import relateted -import nltk, os - -# Tell NLTK to also check the local nltk_data folder -nltk.data.path.append(os.path.join(os.path.dirname(__file__), "nltk_data")) - -from nltk.corpus import words - -# Load the words corpus -valid_words = set(words.words()) -print("engineering" in valid_words) # ✅ Should be True -print("engineerigfnnxng" in valid_words) # ❌ Should be False -import wordninja # Function to split words into valid parts -import re -from urllib.parse import urlparse -from spellchecker import SpellChecker - -import wordninja -# end urlspam -import google.generativeai as genai - -# app.py -# import streamlit as st -# from load_file import load_file - -# st.title("Download HuggingFace Repo Files in Streamlit") - -# filename = st.text_input("Enter filename from repo:", "model.safetensors") - -# if st.button("Download"): -# try: -# local_path = load_file(filename) -# st.success(f"✅ File downloaded to: {local_path}") -# st.write("You can now use this file in your app.") -# except Exception as e: -# st.error(f"❌ Error: {str(e)}") - - -# Set API key (no need to assign OpenAI() to client like that) -# openai.api_key = os.getenv("OPENAI_API_KEY") - -# def ask_openai_scientific_validation(statement): -# prompt = f"""Assess the scientific accuracy of: "{statement}"\nRespond with ✅ (possible) or ❌ (impossible), and explain simply.""" - -# try: -# client = OpenAI() # This is correct placement -# response = client.chat.completions.create( -# model="gpt-3.5-turbo", -# messages=[ -# {"role": "system", "content": "You are a scientific fact-checker."}, -# {"role": "user", "content": prompt} -# ], -# temperature=0.7, -# max_tokens=150 -# ) - - -# return response.choices[0].message.content.strip() - -# except Exception as e: -# return f"⚠️ Could not verify:\n\n{str(e)}" - - - #huggung face code start -REPO_ID = "deedrop1140/nero-ml" -MODEL_DIR = "Models" - -def load_file(filename): - """Try to load model from local folder; if missing, download from Hugging Face Hub.""" - local_path = os.path.join(MODEL_DIR, filename) - - # 1️⃣ Check if file exists locally - if os.path.exists(local_path): - file_path = local_path - else: - # 2️⃣ Download from Hugging Face (Render case) - file_path = hf_hub_download(repo_id=REPO_ID, filename=filename) - - # 3️⃣ Load based on file extension - if filename.endswith((".pkl", ".joblib")): - return joblib.load(file_path) - elif filename.endswith(".npy"): - return np.load(file_path, allow_pickle=True) - elif filename.endswith((".pt", ".pth")): - return torch.load(file_path, map_location="cpu") - else: - return file_path - -# # ===================== -# # Replace your old model loads with this: -# # ===================== - -# # Models -# knn_model = load_file("Models/knn_model.pkl") -# lasso_model = load_file("Models/lasso_model.pkl") -# liar_model = load_file("Models/liar_model.joblib") -# linear_model = load_file("Models/linear_model.pkl") -# logistic_model = load_file("Models/logistic_model.pkl") -# nb_url_model = load_file("Models/nb_url_model.pkl") -# poly_model = load_file("Models/poly_model.pkl") -# rf_model = load_file("Models/rf_model.pkl") -# ridge_model = load_file("Models/ridge_model.pkl") -# supervised_model = load_file("Models/supervised_model.pkl") -# svr_model = load_file("Models/svr_model.pkl") -# voting_url_model = load_file("Models/voting_url_model.pkl") - -# # Vectorizers / Encoders / Scalers -# label_classes = load_file("Models/label_classes.npy") -# label_encoder = load_file("Models/label_encoder.pkl") -# lasso_scaler = load_file("Models/lasso_scaler.pkl") -# liar_vectorizer = load_file("Models/liar_vectorizer.joblib") -# nb_url_vectorizer = load_file("Models/nb_url_vectorizer.pkl") -# poly_transform = load_file("Models/poly_transform.pkl") -# ridge_scaler = load_file("Models/ridge_scaler.pkl") -# svr_scaler_X = load_file("Models/svr_scaler_X.pkl") -# svr_scaler_y = load_file("Models/svr_scaler_y.pkl") -# tfidf_vectorizer = load_file("Models/tfidf_vectorizer.pkl") -# url_vectorizer = load_file("Models/url_vectorizer.pkl") -# vectorizer_joblib = load_file("Models/vectorizer.joblib") -# vectorizer_pkl = load_file("Models/vectorizer.pkl") -# # huggung face code end - -MODEL_DIR = "Models" -DATA_DIR = "housedata" # Assuming your house data is here -UPLOAD_FOLDER = 'static/uploads' # NEW: Folder for temporary user uploads - -app = Flask(__name__) -app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER -CORS(app) - - - -genai.configure(api_key=os.getenv("GEMINI_API_KEY")) - -def ask_gemini(statement): - model = genai.GenerativeModel("gemini-2.0-flash-001") - response = model.generate_content(f"Verify this statement for truth: {statement}") - return response.text - -#rfc -# model = load("Models/liar_model.joblib") -# vectorizer = load("Models/liar_vectorizer.joblib") - -# Load BERT fact-checker pipeline (local model) -# bert_checker = pipeline("text-classification", model="microsoft/deberta-v3-small") - -#endrfc - -#svm - -# ==== SVM Setup ==== -X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, - n_clusters_per_class=1, n_classes=2, random_state=42) -scaler = StandardScaler() -X = scaler.fit_transform(X) -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - -# Train SVM -svm_model = SVC(kernel="linear") -svm_model.fit(X_train, y_train) - -#endsvm -#deision tree -GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") -GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" -#end deision tree - -# Ensure directories exist -os.makedirs(MODEL_DIR, exist_ok=True) -os.makedirs(DATA_DIR, exist_ok=True) -os.makedirs(UPLOAD_FOLDER, exist_ok=True) # NEW: Create upload folder - -def clean_text(text): - if pd.isnull(text): - return "" - text = text.lower() - text = re.sub(r"http\S+|www\S+|https\S+", '', text) - text = text.translate(str.maketrans('', '', string.punctuation)) - text = re.sub(r'\d+', '', text) - text = re.sub(r'\s+', ' ', text).strip() - return text - -# --- Helper functions for data generation (conceptual for demo) --- -def generate_linear_data(n_samples=100, noise=0.5): - X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) - y = 2 * X.squeeze() + 5 + noise * np.random.randn(n_samples) - return X, y - -def generate_non_linear_data(n_samples=100, noise=0.5): - X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) - y = np.sin(X.squeeze()) * 10 + noise * np.random.randn(n_samples) - return X, y - -def generate_noisy_data(n_samples=100, noise_factor=3.0): - X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) - y = 2 * X.squeeze() + 5 + noise_factor * np.random.randn(n_samples) # Increased noise - return X, y - -# Function to generate house price data (using your existing data structure for consistency) -def get_house_data(): - try: - df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) - # Using a subset of features for simplicity in demo - features = ['GrLivArea', 'OverallQual', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] - # Check if all required columns exist - if not all(col in df.columns for col in features + ['SalePrice']): - print("Warning: Missing one or more required columns in train.csv for house data.") - return None, None - X = df[features] - y = df['SalePrice'] - return X, y - except FileNotFoundError: - print(f"Error: train.csv not found in {DATA_DIR}. Please ensure your data is there.") - return None, None - except Exception as e: - print(f"Error loading house data: {e}") - return None, None - -# Dictionary to hold all loaded models -loaded_models = {} - -# Load logistic model and vectorizer for SMS -# vectorizer = joblib.load("Models/logvectorizer.pkl") -# model = joblib.load("Models/logistic_model.pkl") -# vectorizer = load_file("Models/logvectorizer.pkl") -# model = load_file("Models/logistic_model.pkl") - - -# # Load models once NB+DT+SVM is trained -# try: -# model = load_file("Models/logistic_model.pkl") -# # vectorizer = joblib.load("Models/logvectorizer.pkl") -# # model = joblib.load("Models/logistic_model.pkl") -# vectorizer = load_file("Models/vectorizer.pkl") -# print("✅ Model and vectorizer loaded into memory successfully!") -# except Exception as e: -# vectorizer = None -# model = None -# print(f"❌ Error: Could not load model or vectorizer. Please check your file paths. Error: {e}") -# #END NB+DT+SVM - -# === Naive Bayes URL Spam Classifier (NB_spam.html) === -# === Load Model & Vectorizer === - - - -# VT_API_KEY = os.getenv("VT_API_KEY") -# nb_model = load_file("Models/nb_url_model.pkl") -# vectorizer = load_file("Models/nb_url_vectorizer.pkl") - -# if nb_model is not None and vectorizer is not None: -# print("✅ Loaded model and vectorizer.") -# else: -# print("❌ Model or vectorizer not found.") - - - - - - -def load_all_models(): - """ - Loads all necessary models into the loaded_models dictionary when the app starts. - """ - global loaded_models - - # Load Supervised Model - # Load Supervised Model -try: - supervised_model_path = load_file("linear_model.pkl") - - # Debug: check what load_file actually returned - print("DEBUG -> supervised_model_path type:", type(supervised_model_path)) - - # If load_file returned a path (string), load with joblib - if isinstance(supervised_model_path, str): - loaded_models['supervised'] = joblib.load(supervised_model_path) - else: - # If load_file already returned the model object - loaded_models['supervised'] = supervised_model_path - - print("Supervised model loaded successfully") - -except FileNotFoundError: - print(f"Error: Supervised model file not found at {supervised_model_path}. " - "Please run train_model.py first.") - loaded_models['supervised'] = None # Mark as not loaded -except Exception as e: - print(f"Error loading supervised model: {e}") - loaded_models['supervised'] = None - - -# Load models when Flask app context is ready -with app.app_context(): - load_all_models() - -@app.route('/') -def frontpage(): - return render_template('frontpage.html') -@app.route('/home') -def home(): - return render_template('home.html') - -@app.route('/supervise') -def supervise(): - return render_template('supervise.html', active_page='supervise') - - -@app.route('/unsupervised') -def unsupervised(): - return render_template('unsupervised.html', active_page='unsupervised') - -# Semi-Supervised Learning page -@app.route('/semi-supervised') -def semi_supervised(): - return render_template('semi_supervised.html', active_page='semi_supervised') - -# Reinforcement Learning page -@app.route('/reinforcement') -def reinforcement(): - return render_template('reinforcement.html', active_page='reinforcement') - -# Ensemble Learning page -@app.route('/ensemble') -def ensemble(): - return render_template('ensemble.html', active_page='ensemble') - - -@app.route('/supervised', methods=['GET', 'POST']) -def supervised(): - prediction = None - hours_studied_input = None - - if loaded_models['supervised'] is None: - return "Error: Supervised model could not be loaded. Please check server logs.", 500 - - if request.method == 'POST': - try: - hours_studied_input = float(request.form['hours']) - input_data = np.array([[hours_studied_input]]) - - predicted_score = loaded_models['supervised'].predict(input_data)[0] - prediction = round(predicted_score, 2) - - except ValueError: - print("Invalid input for hours studied.") - prediction = "Error: Please enter a valid number." - except Exception as e: - print(f"An error occurred during prediction: {e}") - prediction = "Error during prediction." - - return render_template('supervised.html', prediction=prediction, hours_studied_input=hours_studied_input) - - -@app.route('/polynomial', methods=['GET', 'POST']) -def polynomial(): - if request.method == 'POST': - try: - hours = float(request.form['hours']) - - # model = joblib.load('Models/poly_model.pkl') - # poly = joblib.load('Models/poly_transform.pkl') - # model = load_file("Models/poly_model.pkl") - # poly= load_file("Models/poly_transform.pkl") - model = load_file("poly_model.pkl") - poly= load_file("poly_transform.pkl") - - transformed_input = poly.transform([[hours]]) - prediction = model.predict(transformed_input)[0] - - return render_template("poly.html", prediction=round(prediction, 2), hours=hours) - - except Exception as e: - print(f"Error: {e}") - return render_template("poly.html", error="Something went wrong.") - - return render_template("poly.html") - - -@app.route('/random_forest', methods=['GET', 'POST']) -def random_forest(): - if request.method == 'POST': - try: - hours = float(request.form['hours']) - model = load_file("rf_model.pkl") - # model = joblib.load('Models/rf_model.pkl') - prediction = model.predict([[hours]])[0] - - return render_template("rf.html", prediction=round(prediction, 2), hours=hours) - except Exception as e: - print(f"[ERROR] {e}") - return render_template("rf.html", error="Prediction failed. Check your input.") - return render_template("rf.html") - -@app.route('/prediction_flow') -def prediction_flow(): - return render_template('prediction_flow.html') - -@app.route("/lasso", methods=["GET", "POST"]) -def lasso(): - if request.method == "POST": - try: - inputs = [float(request.form.get(f)) for f in ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']] - - # model = load_file("Models/lasso_model.pkl") - # scaler = load_file("Models/lasso_scaler.pkl") - # model = joblib.load("Models/lasso_model.pkl") - # scaler = joblib.load("Models/lasso_scaler.pkl") - model = load_file("lasso_model.pkl") - scaler = load_file("lasso_scaler.pkl") - - scaled_input = scaler.transform([inputs]) - - prediction = model.predict(scaled_input)[0] - return render_template("lasso.html", prediction=round(prediction, 2)) - - except Exception as e: - return render_template("lasso.html", error=str(e)) - - return render_template("lasso.html") - - -@app.route('/ridge', methods=['GET', 'POST']) -def ridge(): - prediction = None - error = None - - try: - # model = load_file("Models/ridge_model.pkl") - # scaler = load_file("Models/ridge_scaler.pkl") - # model = joblib.load(os.path.join(MODEL_DIR, 'ridge_model.pkl')) - # scaler = joblib.load(os.path.join(MODEL_DIR, 'ridge_scaler.pkl')) - - model = load_file("ridge_model.pkl") - scaler = load_file("ridge_scaler.pkl") - - - except Exception as e: - return f"❌ Error loading Ridge model: {e}", 500 - - if request.method == 'POST': - try: - features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] - input_data = [float(request.form[feature]) for feature in features] - input_scaled = scaler.transform([input_data]) - prediction = model.predict(input_scaled)[0] - except Exception as e: - error = str(e) - - return render_template('ridge.html', prediction=prediction, error=error) - -@app.route('/dtr', methods=['GET', 'POST']) -def dtr(): - if request.method == 'GET': - return render_template('dtr.html') - - if request.method == 'POST': - data = request.get_json() - data_points = data.get('dataPoints') if data else None - print("Received data:", data_points) - return jsonify({'message': 'Data received successfully!', 'receivedData': data_points}) - - -@app.route('/dtrg') -def drg(): - return render_template('desiciongame.html') - -# --- SVR Routes --- -@app.route('/svr') # This route is for the initial GET request to load the page -def svr_page(): - return render_template('svr.html') - - -@app.route('/run_svr_demo', methods=['POST']) -def run_svr_demo(): - try: - # Check if the request contains JSON (for predefined datasets) or FormData (for file uploads) - if request.is_json: - data = request.json - else: - # For FormData, data is accessed via request.form for fields, request.files for files - data = request.form - - dataset_type = data.get('dataset_type', 'linear') - kernel_type = data.get('kernel', 'rbf') - C_param = float(data.get('C', 1.0)) - gamma_param = float(data.get('gamma', 0.1)) - epsilon_param = float(data.get('epsilon', 0.1)) - - X, y = None, None - - if dataset_type == 'linear': - X, y = generate_linear_data() - elif dataset_type == 'non_linear': - X, y = generate_non_linear_data() - elif dataset_type == 'noisy': - X, y = generate_noisy_data() - elif dataset_type == 'house_data': - X_house, y_house = get_house_data() - if X_house is not None and not X_house.empty: - X = X_house[['GrLivArea']].values # Only GrLivArea for simple 1D plotting - y = y_house.values - else: - X, y = generate_linear_data() # Fallback if house data is missing/invalid - elif dataset_type == 'custom_csv': # NEW: Handle custom CSV upload - uploaded_file = request.files.get('file') - x_column_name = data.get('x_column_name') - y_column_name = data.get('y_column_name') - - if not uploaded_file or uploaded_file.filename == '': - return jsonify({'error': 'No file uploaded for custom CSV.'}), 400 - if not x_column_name or not y_column_name: - return jsonify({'error': 'X and Y column names are required for custom CSV.'}), 400 - - try: - # Read CSV into a pandas DataFrame from in-memory BytesIO object - df = pd.read_csv(io.BytesIO(uploaded_file.read())) - - if x_column_name not in df.columns or y_column_name not in df.columns: - missing_cols = [] - if x_column_name not in df.columns: missing_cols.append(x_column_name) - if y_column_name not in df.columns: missing_cols.append(y_column_name) - return jsonify({'error': f"Missing columns in uploaded CSV: {', '.join(missing_cols)}"}), 400 - - X = df[[x_column_name]].values # Ensure X is 2D for scikit-learn - y = df[y_column_name].values - except Exception as e: - return jsonify({'error': f"Error reading or processing custom CSV: {str(e)}"}), 400 - else: # Fallback for unknown dataset types - X, y = generate_linear_data() - - - if X is None or y is None or len(X) == 0: - return jsonify({'error': 'Failed to generate or load dataset.'}), 500 - - # Scale data - scaler_X = StandardScaler() - scaler_y = StandardScaler() - - X_scaled = scaler_X.fit_transform(X) - y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten() - - X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42) - - # Train SVR model - svr_model = SVR(kernel=kernel_type, C=C_param, gamma=gamma_param, epsilon=epsilon_param) - svr_model.fit(X_train, y_train) - - # Make predictions - y_pred_scaled = svr_model.predict(X_test) - - # Inverse transform predictions to original scale for metrics - y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() - y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten() - - # Calculate metrics - mse = mean_squared_error(y_test_original, y_pred) - r2 = r2_score(y_test_original, y_pred) - support_vectors_count = len(svr_model.support_vectors_) - - # Prepare data for plotting - plot_X_original = scaler_X.inverse_transform(X_scaled) - plot_y_original = scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten() - - x_plot = np.linspace(plot_X_original.min(), plot_X_original.max(), 500).reshape(-1, 1) - x_plot_scaled = scaler_X.transform(x_plot) - y_plot_scaled = svr_model.predict(x_plot_scaled) - y_plot_original = scaler_y.inverse_transform(y_plot_scaled.reshape(-1, 1)).flatten() - - y_upper_scaled = y_plot_scaled + epsilon_param - y_lower_scaled = y_plot_scaled - epsilon_param - y_upper_original = scaler_y.inverse_transform(y_upper_scaled.reshape(-1, 1)).flatten() - y_lower_original = scaler_y.inverse_transform(y_lower_scaled.reshape(-1, 1)).flatten() - - plot_data = { - 'data': [ - { - 'x': plot_X_original.flatten().tolist(), - 'y': plot_y_original.tolist(), - 'mode': 'markers', - 'type': 'scatter', - 'name': 'Original Data' - }, - { - 'x': x_plot.flatten().tolist(), - 'y': y_plot_original.tolist(), - 'mode': 'lines', - 'type': 'scatter', - 'name': 'SVR Prediction', - 'line': {'color': 'red'} - }, - { - 'x': x_plot.flatten().tolist(), - 'y': y_upper_original.tolist(), - 'mode': 'lines', - 'type': 'scatter', - 'name': 'Epsilon Tube (Upper)', - 'line': {'dash': 'dash', 'color': 'green'}, - 'fill': 'tonexty', - 'fillcolor': 'rgba(0,128,0,0.1)' - }, - { - 'x': x_plot.flatten().tolist(), - 'y': y_lower_original.tolist(), - 'mode': 'lines', - 'type': 'scatter', - 'name': 'Epsilon Tube (Lower)', - 'line': {'dash': 'dash', 'color': 'green'} - } - ], - 'layout': { - 'title': f'SVR Regression (Kernel: {kernel_type.upper()})', - 'xaxis': {'title': 'Feature Value'}, - 'yaxis': {'title': 'Target Value'}, - 'hovermode': 'closest' - } - } - - return jsonify({ - 'mse': mse, - 'r2_score': r2, - 'support_vectors_count': support_vectors_count, - 'plot_data': plot_data - }) - - except Exception as e: - print(f"Error in SVR demo: {e}") - return jsonify({'error': str(e)}), 500 - - -def clean_text(text): - return text.lower().strip() - - - - # Gradient-desent route -@app.route('/gradient-descent') -def gradient_descent(): - return render_template('Gradient-Descen.html') - -# Gradient-boosting route -@app.route('/gradient-boosting') -def gradient_boosting(): - return render_template('Gradient-Boosting.html') - -# Gradient-xgboost route -@app.route('/xgboost-regression') -def xgboost_regression(): - return render_template('XGBoost-Regression.html') - -#Gradient-lightgbm route -@app.route('/lightgbm') -def lightgbm(): - return render_template('LightGBM-Regression.html') - -#nerual network route for calssifcation -@app.route('/neural-network-classification') -def neural_network_classification(): - return render_template('Neural-Networks-for-Classification.html') - -#hierarchical clustering route - -@app.route('/hierarchical-clustering') -def hierarchical_clustering(): - return render_template('Hierarchical-Clustering.html') - -#Gaussian-mixture-models route -@app.route('/gaussian-mixture-models') -def gaussian_mixture_models(): - return render_template('Gaussian-Mixture-Models.html') - -#Principal-Component-Analysis -@app.route('/pca') -def pca(): - return render_template('Principal-Component-Analysis.html') - -#t-sne -@app.route('/t-sne') -def tsne(): - return render_template('t-SNE.html') - -# liner-discriminant-analysis -@app.route('/lda') -def lda(): - return render_template('Linear-Discriminant-Analysis.html') - -# Independent-Component-Analysis -@app.route('/ica') -def ica(): - return render_template('Independent-Component-Analysis.html') - -#Apriori -@app.route('/apriori') -def apriori(): - return render_template('Apriori-Algorithm.html') - - -# Eclat Algorithm -@app.route('/eclat') -def eclat(): - return render_template('Eclat-Algorithm.html') - -#genrative models -@app.route('/generative-models') -def generative_models(): - return render_template('Generative-Models.html') - -#self training -@app.route('/self-training') -def self_training(): - return render_template('Self-Training.html') - - -# TRANSDUCTIVE SVM -@app.route('/transductive-svm') -def transductive_svm(): - return render_template('Transductive-SVM.html') - - -#Graph-Based Methods -@app.route('/graph-based-methods') -def graph_based_methods(): - return render_template('Graph-Based-Method.html') - -#Agent-Environment-State -@app.route('/agent-environment-state') -def agent_environment_state(): - return render_template('Agent-Environment-State.html') - -#Action and Policy -@app.route('/action-and-policy') -def action_and_policy(): - return render_template('Action-and-Policy.html') - -#Reward-ValueFunction -@app.route('/reward-valuefunction') -def reward_valuefunction(): - return render_template('Reward-ValueFunction.html') - -#Q-Learning -@app.route('/q-learning') -def q_learning(): - return render_template('Q-Learning.html') - -#Deep Reinforcement Learning -@app.route('/deep-reinforcement-learning') -def deep_reinforcement_learning(): - return render_template('Deep-Reinforcement-Learning.html') - - -#Bagging -@app.route('/bagging') -def bagging(): - return render_template('Bagging.html') - -#Boosting -@app.route('/boosting') -def boosting(): - return render_template('Boosting.html') - -# stacking -@app.route('/stacking') -def stacking(): - return render_template('Stacking.html') - -# voting -@app.route('/voting') -def voting(): - return render_template('Voting.html') - -import re - -# Load saved model and vectorizer -# model = joblib.load("Models/logistic_model.pkl") -# vectorizer = joblib.load("Models/logvectorizer.pkl") - - -# Text cleaning -def clean_text(text): - text = text.lower() - text = re.sub(r'\W', ' ', text) - text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text) - text = re.sub(r'\s+', ' ', text) - return text.strip() - -@app.route('/logistic', methods=['GET', 'POST']) -def logistic(): - prediction, confidence_percentage, cleaned, tokens, probability = None, None, None, None, None - - - # model = load_file("Models/logistic_model.pkl") - # vectorizer = load_file("Models/logvectorizer.pkl") - model = load_file("logistic_model.pkl") - vectorizer = load_file("logvectorizer.pkl") - - if request.method == "POST": - msg = request.form.get('message', '') - cleaned = clean_text(msg) - tokens = cleaned.split() - - - try: - vector = vectorizer.transform([cleaned]) - probability = model.predict_proba(vector)[0][1] - prediction = "Spam" if probability >= 0.5 else "Not Spam" - confidence_percentage = round(probability * 100, 2) - except Exception as e: - print("Error predicting:", e) - prediction = "Error" - confidence_percentage = 0 - - return render_template( - "logistic.html", - prediction=prediction, - confidence_percentage=confidence_percentage, - cleaned=cleaned, - tokens=tokens, - probability=round(probability, 4) if probability else None, - source="sms" - ) - -@app.route('/logistic-sms', methods=['POST']) -def logistic_sms(): - try: - data = request.get_json() - msg = data.get('message', '') - cleaned = clean_text(msg) - tokens = cleaned.split() - - vector = vectorizer.transform([cleaned]) - probability = model.predict_proba(vector)[0][1] - prediction = "Spam" if probability >= 0.5 else "Not Spam" - confidence_percentage = round(probability * 100, 2) - - return jsonify({ - "prediction": prediction, - "confidence": confidence_percentage, - "probability": round(probability, 4), - "cleaned": cleaned, - "tokens": tokens, - "source": "json" - }) - - except Exception as e: - print("Error in /logistic-sms:", e) - return jsonify({"error": "Internal server error", "details": str(e)}), 500 - - - -# @app.route("/logistic", methods=["GET", "POST"]) -# def logistic(): -# prediction = None -# error = None -# if request.method == "POST": -# try: -# input_text = request.form.get("message") - -# # Load the vectorizer and logistic model from Models folder -# vectorizer = joblib.load("Models/vectorizer.pkl") -# model = joblib.load("Models/logistic_model.pkl") - -# # Transform input and make prediction -# input_vector = vectorizer.transform([input_text]) -# result = model.predict(input_vector)[0] - -# prediction = "✅ Not Spam" if result == 0 else "🚨 Spam" -# except Exception as e: -# error = str(e) - -# return render_template("logistic.html", prediction=prediction, error=error) - - - - - - -@app.route("/knn") -def knn_visual(): - return render_template("knn.html") - -@app.route('/knn_visual_predict', methods=['POST']) -def knn_visual_predict(): - data = request.get_json() - points = np.array(data['points']) # shape: (N, 3) - test_point = np.array(data['test_point']) # shape: (2,) - k = int(data['k']) - - X = points[:, :2] - y = points[:, 2].astype(int) - - knn = KNeighborsClassifier(n_neighbors=k) - knn.fit(X, y) - pred = knn.predict([test_point])[0] - - dists = np.linalg.norm(X - test_point, axis=1) - neighbor_indices = np.argsort(dists)[:k] - neighbors = X[neighbor_indices] - - return jsonify({ - 'prediction': int(pred), - 'neighbors': neighbors.tolist() - }) - -# 🔷 Route 2: KNN Digit Image Classifier (Upload-based) -@app.route("/knn_image") -def knn_image_page(): - return render_template("knn_image.html") - -from PIL import Image - -@app.route("/predict_image", methods=["POST"]) -def predict_image(): - if "image" not in request.files: - return jsonify({"error": "No image uploaded"}), 400 - - file = request.files["image"] - - try: - # ✅ Use PIL to open image from file bytes - image = Image.open(file.stream).convert("RGB") - image = image.resize((32, 32)) # Resize to match training size - img_array = np.array(image).flatten().reshape(1, -1) - except Exception as e: - return jsonify({"error": f"Invalid image. {str(e)}"}), 400 - - # Load model & labels - - # model = load_file("Models/knn_model.pkl") - # label_classes = load_file("Models/lasso_model.pkl") - # model = joblib.load("Models/knn_model.pkl") - # label_classes = np.load("Models/label_classes.npy") - model = load_file("knn_model.pkl") - label_classes = load_file("label_classes.npy") - - # Predict class and get probabilities - probs = model.predict_proba(img_array)[0] - pred_index = np.argmax(probs) - pred_label = label_classes[pred_index] - confidence = round(float(probs[pred_index]) * 100, 2) - - return jsonify({ - "prediction": str(pred_label), - "confidence": f"{confidence}%", - "all_probabilities": { - str(label_classes[i]): round(float(probs[i]) * 100, 2) - for i in range(len(probs)) - } - }) - -@app.route("/rfc") -def random_forest_page(): - return render_template("Random_Forest_Classifier.html") # Your beautiful HTML goes in rfc.html - -@app.route('/rf_visual_predict', methods=['POST']) -def rf_visual_predict(): - try: - data = request.get_json() - print("📦 Incoming JSON data:", data) - - labeled_points = data.get('points') - test_point = data.get('test_point') - - if not labeled_points or not test_point: - return jsonify({"error": "Missing points or test_point"}), 400 - - df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) - X = df[['X1', 'X2']] - y = df['Class'] - - rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42) - rf_model.fit(X, y) - - test_point_np = np.array(test_point).reshape(1, -1) - prediction = int(rf_model.predict(test_point_np)[0]) - - x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 - y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - - Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()]) - Z = Z.reshape(xx.shape) - - return jsonify({ - 'prediction': prediction, - 'decision_boundary_z': Z.tolist(), - 'decision_boundary_x_coords': xx[0, :].tolist(), - 'decision_boundary_y_coords': yy[:, 0].tolist() - }) - - except Exception as e: - import traceback - print("❌ Exception in /rf_visual_predict:") - traceback.print_exc() # Print full error stack trace - return jsonify({"error": str(e)}), 500 - -@app.route("/liar") -def liar_input_page(): - return render_template("rfc_liar_predict.html") - - - - - - - -@app.route("/ref/liar/predictor", methods=["POST"]) -def liar_predictor(): - try: - data = request.get_json() - statement = data.get("statement", "") - - if not statement: - return jsonify({"success": False, "error": "Missing statement"}), 400 - - try: - # 🔍 LIAR Model Prediction - features = vectorizer.transform([statement]) - prediction = model.predict(features)[0] - - liar_label_map = { - 0: "It can be false 🔥", - 1: "False ❌", - 2: "Mostly false but can be true 🤏", - 3: "Half True 🌓", - 4: "Mostly True 👍", - 5: "True ✅" - } - - prediction_label = liar_label_map.get(int(prediction), "Unknown") - - except ValueError as ve: - if "features" in str(ve): - # Fallback to Gemini API - prediction_label = ask_gemini(statement) - else: - raise ve - - # 🧠 BERT-Based Scientific Check - bert_result = bert_checker(statement)[0] - bert_label = bert_result["label"] - bert_score = round(bert_result["score"] * 100, 2) - - science_label_map = { - "LABEL_0": "✅ Scientifically Possible", - "LABEL_1": "❌ Scientifically Impossible" - } - - scientific_check = f"{science_label_map.get(bert_label, bert_label)} ({bert_score:.2f}%)" - - return jsonify({ - "success": True, - "prediction": prediction_label, - "reason": "Predicted from linguistic and content-based patterns, or Gemini fallback.", - "scientific_check": scientific_check - }) - - except Exception as e: - traceback.print_exc() - return jsonify({"success": False, "error": str(e)}), 500 - - - -#svm -@app.route("/svm") -def svm_page(): - return render_template("svm.html") - -@app.route('/svm_visual_predict', methods=['POST']) -def svm_visual_predict(): - data = request.json - labeled_points = data['points'] - test_point = data['test_point'] - svm_type = data['svm_type'] - c_param = float(data['c_param']) - gamma_param = float(data['gamma_param']) # Will be ignored for linear kernel - - df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) - X = df[['X1', 'X2']] - y = df['Class'] - - # 1. Train the SVM Classifier - if svm_type == 'linear': - svm_model = svm.SVC(kernel='linear', C=c_param, random_state=42) - elif svm_type == 'rbf': - svm_model = svm.SVC(kernel='rbf', C=c_param, gamma=gamma_param, random_state=42) - else: - return jsonify({'error': 'Invalid SVM type'}), 400 - - svm_model.fit(X, y) - - # 2. Predict for the test point - test_point_np = np.array(test_point).reshape(1, -1) - prediction = int(svm_model.predict(test_point_np)[0]) - - # 3. Get Support Vectors - # support_vectors_ refers to indices of support vectors - # svc_model.support_vectors_ gives the actual support vectors - support_vectors = svm_model.support_vectors_.tolist() - - # 4. Generate data for the decision boundary - # Create a meshgrid of points to predict across the entire plot area - x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 - y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 - - # Extend range slightly to ensure test point is within boundary if it's an outlier - x_min = min(x_min, test_point_np[0,0] - 1) - x_max = max(x_max, test_point_np[0,0] + 1) - y_min = min(y_min, test_point_np[0,1] - 1) - y_max = max(y_max, test_point_np[0,1] + 1) - - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - - # Predict class for each point in the meshgrid - Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()]) - Z = Z.reshape(xx.shape) - - # Convert numpy arrays to lists for JSON serialization - decision_boundary_z = Z.tolist() - decision_boundary_x_coords = xx[0, :].tolist() - decision_boundary_y_coords = yy[:, 0].tolist() - - return jsonify({ - 'prediction': prediction, - 'decision_boundary_z': decision_boundary_z, - 'decision_boundary_x_coords': decision_boundary_x_coords, - 'decision_boundary_y_coords': decision_boundary_y_coords, - 'support_vectors': support_vectors - }) - - - - - - - -@app.route('/api/explain', methods=['POST']) -def explain(): - # In a real deployed environment, you'd secure your API key. - # For Canvas, it's automatically injected if GEMINI_API_KEY is empty string. - # If running locally and not in Canvas, set GEMINI_API_KEY in your environment variables. - if not GEMINI_API_KEY and not os.getenv("FLASK_ENV") == "development": # Allow empty key in dev for local testing - return jsonify({'error': 'Missing API key'}), 500 - - payload = request.get_json() - - try: - response = requests.post( - f"{GEMINI_URL}?key={GEMINI_API_KEY}", - headers={"Content-Type": "application/json"}, - json=payload - ) - response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) - return jsonify(response.json()) - except requests.exceptions.RequestException as e: - app.logger.error(f"Error calling Gemini API: {e}") # Log the error on the server side - return jsonify({'error': str(e)}), 500 - -@app.route('/decision_tree') -def decision_tree_page(): - # This route serves your Decision Tree visualization page - # Ensure the HTML file name matches (e.g., 'decision_tree_viz.html' or 'decision_tree.html') - return render_template('decision_tree.html') # Check your actual HTML file name here - - -@app.route('/game') -def decision_tree_game(): - """Renders the interactive game page for decision trees.""" - return render_template('decision_tree_game.html') - -@app.route('/dt_visual_predict', methods=['POST']) -def dt_visual_predict(): - try: - data = request.json - labeled_points = data['points'] - test_point = data['test_point'] - max_depth = int(data['max_depth']) - - # Convert labeled_points to a pandas DataFrame - df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) - X = df[['X1', 'X2']] - y = df['Class'] - - # Check if there's enough data to train - if X.empty or len(X) < 2: - return jsonify({'error': 'Not enough data points to train the model.'}), 400 - - # 1. Train the Decision Tree Classifier (This is the "model" part) - dt_model = DecisionTreeClassifier(max_depth=max_depth, random_state=42) - dt_model.fit(X, y) - - # 2. Predict for the test point - test_point_np = np.array(test_point).reshape(1, -1) - prediction = int(dt_model.predict(test_point_np)[0]) - - # 3. Generate data for the decision boundary - x_min, x_max = X['X1'].min(), X['X1'].max() - y_min, y_max = X['X2'].min(), X['X2'].max() - - # Add a buffer to the plot range to make sure points are not on the edge - # And handle cases where min == max (e.g., all points have same X1 value) - x_buffer = 1.0 if (x_max - x_min) == 0 else (x_max - x_min) * 0.1 - y_buffer = 1.0 if (y_max - y_min) == 0 else (y_max - y_min) * 0.1 - - x_min -= x_buffer - x_max += x_buffer - y_min -= y_buffer - y_max += y_buffer - - # Ensure test point is also comfortably within the range - x_min = min(x_min, test_point_np[0,0] - 0.5) - x_max = max(x_max, test_point_np[0,0] + 0.5) - y_min = min(y_min, test_point_np[0,1] - 0.5) - y_max = max(y_max, test_point_np[0,1] + 0.5) - - # Create a meshgrid for plotting the decision boundary - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - - # Predict class for each point in the meshgrid using the trained model - Z = dt_model.predict(np.c_[xx.ravel(), yy.ravel()]) - Z = Z.reshape(xx.shape) - - # Convert numpy arrays to lists for JSON serialization - decision_boundary_z = Z.tolist() - decision_boundary_x_coords = xx[0, :].tolist() - decision_boundary_y_coords = yy[:, 0].tolist() - - return jsonify({ - 'prediction': prediction, - 'decision_boundary_z': decision_boundary_z, - 'decision_boundary_x_coords': decision_boundary_x_coords, - 'decision_boundary_y_coords': decision_boundary_y_coords - }) - except Exception as e: - # This will print the actual error to your terminal - print(f"An error occurred in /dt_visual_predict: {e}") - # Return a more informative error message to the frontend - return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 - - # --- Naive Bayes Routes --- - -from urllib.parse import urlparse -from sklearn.naive_bayes import GaussianNB -from nltk.corpus import words - -nb_model = load_file("nb_url_model.pkl") -vectorizer = load_file("nb_url_vectorizer.pkl") - -# if nb_model is not None and vectorizer is not None: -# print("✅ Loaded Naive Bayes URL model") -# else: -# nb_model, vectorizer = None, None -# print("❌ vectorizer not found") - - - -@app.route('/nb_spam') -def nb_spam_page(): - return render_template('NB_spam.html') - - -import re -from urllib.parse import urlparse -from spellchecker import SpellChecker -import wordninja - - - -# ---- Whitelist (your full one, unchanged) ---- -whitelist = set([ - # Search Engines - 'google', 'bing', 'yahoo', 'duckduckgo', 'baidu', 'ask', - - # Social Media - 'facebook', 'instagram', 'twitter', 'linkedin', 'snapchat', 'tiktok', - 'threads', 'pinterest', 'reddit', 'quora', - - # Communication Tools - 'whatsapp', 'telegram', 'skype', 'zoom', 'meet', 'discord', - 'teams', 'signal', 'messenger', - - # Global E-commerce - 'amazon', 'ebay', 'shopify', 'alibaba', 'walmart', 'target', - 'etsy', 'shein', 'bestbuy', 'costco', 'newegg', - - # Indian E-commerce / Services - 'flipkart', 'myntra', 'ajio', 'nykaa', 'meesho', 'snapdeal', - 'paytm', 'phonepe', 'mobikwik', 'zomato', 'swiggy', 'ola', 'uber', 'bookmyshow', - 'ixigo', 'makemytrip', 'yatra', 'redbus', 'bigbasket', 'grofers', 'blinkit', - 'universalcollegeofengineering', - - # Education / Productivity - 'youtube', 'docs', 'drive', 'calendar', 'photos', 'gmail', 'notion', - 'edx', 'coursera', 'udemy', 'khanacademy', 'byjus', 'unacademy', - - # News / Media / Tech - 'bbc', 'cnn', 'nyt', 'forbes', 'bloomberg', 'reuters', - 'ndtv', 'indiatimes', 'thehindu', 'hindustantimes', 'indiatoday', - 'techcrunch', 'verge', 'wired', - - # Streaming / Entertainment - 'netflix', 'hotstar', 'primevideo', 'spotify', 'gaana', 'wynk', 'saavn', 'voot', - - # Dev & Tools - 'github', 'stackoverflow', 'medium', 'gitlab', 'bitbucket', - 'adobe', 'figma', 'canva', - - # Financial / Banking - 'hdfcbank', 'icicibank', 'sbi', 'axisbank', 'kotak', 'boi', 'upi', - 'visa', 'mastercard', 'paypal', 'stripe', 'razorpay', 'phonepe', 'paytm', - - # Government / Utilities - 'gov', 'nic', 'irctc', 'uidai', 'mygov', 'incometax', 'aadhar', 'rbi', - - # Others Common - 'airtel', 'jio', 'bsnl', 'vi', 'speedtest', 'cricbuzz', 'espn', 'espncricinfo', - 'wikipedia', 'mozilla', 'opera', 'chrome', 'android', 'apple', 'windows', 'microsoft' -]) - - # ... your full whitelist from before ... - - -# ---- Trusted & Bad TLDs ---- -trusted_tlds = [ - '.gov', '.nic.in', '.edu', '.ac.in', '.mil', '.org', '.int', - '.co.in', '.gov.in', '.res.in', '.net.in', '.nic.gov.in' -] - -# Expanded Bad TLDs (Rule 4) -bad_tlds = [ - '.xyz', '.tk', '.ml', '.ga', '.cf', '.top', '.gq', '.cn', - '.ru', '.pw', '.bid', '.link', '.loan', '.party', '.science', - '.stream', '.webcam', '.online', '.site', '.website', '.space', - '.club', '.buzz', '.info' -] - -# Suspicious extensions (Rule 13) -suspicious_extensions = ['.exe', '.zip', '.rar', '.js', '.php', '.asp', '.aspx', '.jsp', '.sh'] - -# Phishing keywords (Rule 11, your full list) -phishing_keywords = [ - 'login', 'verify', 'secure', 'account', 'update', 'confirm', 'authenticate', - 'free', 'bonus', 'offer', 'prize', 'winner', 'gift', 'coupon', 'discount', - 'bank', 'paypal', 'creditcard', 'mastercard', 'visa', 'amex', 'westernunion', - 'signin', 'click', 'password', 'unlock', 'recover', 'validate', 'urgency', - 'limitedtime', 'expires', 'suspicious', 'alert', 'important', 'actionrequired' -] - -# ---- Rules 5–14 ---- -rules = { - 5: r"https?://\d{1,3}(\.\d{1,3}){3}", - 6: r"@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", - 7: r"(free money|win now|click here)", - 8: r"https?://[^\s]*\.(ru|cn|tk)", - 9: r"https?://.{0,6}\..{2,6}/.{0,6}", - 10: r"[0-9]{10,}", - 12: r"https?://[^\s]*@[^\s]+", - 13: r"https?://[^\s]*//[^\s]+", - 14: r"https?://[^\s]*\?(?:[^=]+=[^&]*&){5,}", -} - - -# ---- Gibberish Check Helper (Rule 15) ---- -def is_gibberish_word(word): - vowels = "aeiou" - v_count = sum(c in vowels for c in word) - return v_count / len(word) < 0.25 - -# # ---- Utility: Extract words from URL ---- -# def extract_words(url): -# parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) -# raw = parsed.netloc.replace('-', '') + parsed.path.replace('-', '') -# # Split using wordninja -# words = wordninja.split(raw.lower()) -# # Keep only alphabetic words of length >= 3 -# words = [w for w in words if w.isalpha() and len(w) >= 3] -# return words -# ---- Extract words from URL ---- -def extract_words(url): - parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) - parts = re.split(r'\W+', parsed.netloc + parsed.path) - final_words = [] - for word in parts: - if len(word) > 2 and word.isalpha(): - split_words = wordninja.split(word.lower()) - if len(split_words) <= 1: - split_words = [word.lower()] - final_words.extend(split_words) - return final_words - - -# --- Your original predict function, now inside the Flask app --- -@app.route("/predict", methods=["POST"]) -def predict(): - try: - data = request.get_json() - url = data.get("url", "").lower() - if not url: - return jsonify({'error': 'No URL provided'}), 400 - - parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) - path = parsed.path - - # ---- SpellChecker using built-in dictionary ---- - spell = SpellChecker(distance=1) - - # ---- Extract words and check spelling ---- - words = extract_words(url) - # ignore known TLDs - tlds_to_ignore = [tld.replace('.', '',"/") for tld in trusted_tlds + bad_tlds] - words_for_spellcheck = [w for w in words if w not in tlds_to_ignore] - - misspelled = spell.unknown(words_for_spellcheck) - steps = [{"word": w, "valid": (w not in misspelled) or (w in tlds_to_ignore)} for w in words] - - if misspelled: - return jsonify({ - "prediction": 1, - "reason": f"🧾 Spelling errors: {', '.join(misspelled)}", - "steps": steps - }) - else: - return jsonify({ - "prediction": 0, - "reason": "✅ No spelling issues", - "steps": steps - }) - - except Exception as e: - return jsonify({'error': f"An issue occurred during spell checking: {str(e)}"}), 500 - - - - -@app.route('/naive_bayes') -def naive_bayes_page(): - return render_template('naive_bayes_viz.html') - - # --- New Naive Bayes Prediction Route --- -@app.route('/nb_visual_predict', methods=['POST']) -def nb_visual_predict(): - try: - data = request.json - labeled_points = data['points'] - test_point = data['test_point'] - - df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) - X = df[['X1', 'X2']] - y = df['Class'] - - # Ensure enough data and at least two classes for classification - if X.empty or len(X) < 2: - return jsonify({'error': 'Not enough data points to train the model.'}), 400 - if len(y.unique()) < 2: - return jsonify({'error': 'Need at least two different classes to classify.'}), 400 - - # Train Gaussian Naive Bayes Model - # GaussianNB is suitable for continuous data - nb_model = GaussianNB() - nb_model.fit(X, y) - - # Predict for the test point - test_point_np = np.array(test_point).reshape(1, -1) - prediction = int(nb_model.predict(test_point_np)[0]) - - # Generate data for the decision boundary - x_min, x_max = X['X1'].min(), X['X1'].max() - y_min, y_max = X['X2'].min(), X['X2'].max() - - x_buffer = 1.0 if x_max - x_min == 0 else (x_max - x_min) * 0.1 - y_buffer = 1.0 if y_max - y_min == 0 else (y_max - y_min) * 0.1 - - x_min -= x_buffer - x_max += x_buffer - y_min -= y_buffer - y_max += y_buffer - - x_min = min(x_min, test_point_np[0,0] - 0.5) - x_max = max(x_max, test_point_np[0,0] + 0.5) - y_min = min(y_min, test_point_np[0,1] - 0.5) - y_max = max(y_max, test_point_np[0,1] + 0.5) - - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - - if xx.size == 0 or yy.size == 0: - return jsonify({'error': 'Meshgrid could not be created. Data range too narrow.'}), 400 - - # Predict class for each point in the meshgrid - # Use predict_proba and then argmax to get class for decision boundary coloring - Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()]) - Z = Z.reshape(xx.shape) - - decision_boundary_z = Z.tolist() - decision_boundary_x_coords = xx[0, :].tolist() - decision_boundary_y_coords = yy[:, 0].tolist() - - return jsonify({ - 'prediction': prediction, - 'decision_boundary_z': decision_boundary_z, - 'decision_boundary_x_coords': decision_boundary_x_coords, - 'decision_boundary_y_coords': decision_boundary_y_coords - }) - except Exception as e: - print(f"An error occurred in /nb_visual_predict: {e}") - return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 - -def check_with_virustotal(url): - try: - headers = {"x-apikey": VT_API_KEY} - submit_url = "https://www.virustotal.com/api/v3/urls" - - # Submit the URL for scanning - response = requests.post(submit_url, headers=headers, data={"url": url}) - url_id = response.json()["data"]["id"] - - # Fetch result - result = requests.get(f"{submit_url}/{url_id}", headers=headers) - data = result.json() - - stats = data["data"]["attributes"]["last_analysis_stats"] - malicious_count = stats.get("malicious", 0) - - if malicious_count > 0: - return True, f"☣️ VirusTotal flagged it as malicious ({malicious_count} engines)" - return False, None - except Exception as e: - print(f"⚠️ VirusTotal error: {e}") - - - - return False, None - - - - - - - - - - -@app.route('/kmeans-clustering') -def clustering(): - return render_template('clustering.html') - -#image code -@app.route('/kmeans-Dbscan-image', methods=['GET', 'POST']) -def compress_and_clean(): - final_image = None - - if request.method == 'POST': - try: - # Get form values - mode = request.form.get('mode', 'compress') - k = int(request.form.get('k', 8)) - eps = float(request.form.get('eps', 0.6)) - min_samples = int(request.form.get('min_samples', 50)) - image_file = request.files.get('image') - - if image_file and image_file.filename != '': - # Load image - img = Image.open(image_file).convert('RGB') - max_size = (518, 518) - img.thumbnail(max_size, Image.Resampling.LANCZOS) - - img_np = np.array(img) - h, w, d = img_np.shape - pixels = img_np.reshape(-1, d) - - # Apply KMeans - kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) - kmeans.fit(pixels) - clustered_pixels = kmeans.cluster_centers_[kmeans.labels_].astype(np.uint8) - - # Mode 1: Just Compress - if mode == 'compress': - final_pixels = clustered_pixels.reshape(h, w, d) - - # Mode 2: Compress + Clean (KMeans + DBSCAN) - else: - # Sample to avoid MemoryError - max_dbscan_pixels = 10000 - if len(clustered_pixels) > max_dbscan_pixels: - idx = np.random.choice(len(clustered_pixels), max_dbscan_pixels, replace=False) - dbscan_input = clustered_pixels[idx] - else: - dbscan_input = clustered_pixels - - # DBSCAN - # For DBSCAN: use only 10,000 pixels max - max_dbscan_pixels = 10000 - - scaler = StandardScaler() - pixels_scaled = scaler.fit_transform(dbscan_input) - db = DBSCAN(eps=eps, min_samples=min_samples) - labels = db.fit_predict(pixels_scaled) - - # Clean noisy pixels - clean_pixels = [] - for i in range(len(dbscan_input)): - label = labels[i] - clean_pixels.append([0, 0, 0] if label == -1 else dbscan_input[i]) - - # Fill extra if sampling was used - if len(clustered_pixels) > max_dbscan_pixels: - clean_pixels.extend([[0, 0, 0]] * (len(clustered_pixels) - len(clean_pixels))) - - final_pixels = np.array(clean_pixels, dtype=np.uint8).reshape(h, w, d) - - # Save final image - final_img = Image.fromarray(final_pixels) - final_image = 'compressed_clean.jpg' - final_img.save(os.path.join(app.config['UPLOAD_FOLDER'], final_image), optimize=True, quality=90) - - except Exception as e: - return f"⚠️ Error: {str(e)}", 500 - - return render_template('kmean-dbscan-image.html', final_image=final_image) - -@app.route('/DBscan') -def DBSCAN(): - return render_template('DBSCAN.html') - - -if __name__ == "__main__": - app.run(host="0.0.0.0", port=7860) - - - - - +from flask import Flask, render_template, request, jsonify +import numpy as np +import pandas as pd +import joblib +import os +from sklearn.svm import SVR +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error, r2_score +from sklearn.neighbors import KNeighborsClassifier +from sklearn.preprocessing import StandardScaler +from sklearn.ensemble import RandomForestClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn import svm +from sklearn.naive_bayes import GaussianNB # <--- Add this import +from sklearn.feature_extraction.text import CountVectorizer +from textblob import TextBlob +import traceback +from flask_cors import CORS +from werkzeug.utils import secure_filename # For secure file names +import io # To read CSV from memory +import re +from sklearn.cluster import KMeans, DBSCAN +from PIL import Image +import matplotlib.pyplot as plt +from joblib import load # ✅ This is the missing line +import traceback +import pickle +from sklearn.svm import SVC +from sklearn.datasets import make_classification +import plotly.graph_objs as go +import json +import requests +from PIL import Image + + +# from transformers import pipeline +from dotenv import load_dotenv +import os +from urllib.parse import urlparse +import tldextract +import string + + +# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline + +# model_name = "microsoft/deberta-v3-small" + +# tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) +# model = AutoModelForSequenceClassification.from_pretrained(model_name) + +# bert_checker = pipeline("text-classification", model=model, tokenizer=tokenizer) + +# Load environment variables from .env +load_dotenv() +#spam url import relateted +import nltk, os + +# Tell NLTK to also check the local nltk_data folder +nltk.data.path.append(os.path.join(os.path.dirname(__file__), "nltk_data")) + +from nltk.corpus import words + +# Load the words corpus +valid_words = set(words.words()) +print("engineering" in valid_words) # ✅ Should be True +print("engineerigfnnxng" in valid_words) # ❌ Should be False +import wordninja # Function to split words into valid parts +import re +from urllib.parse import urlparse +from spellchecker import SpellChecker + +import wordninja +# end urlspam +import google.generativeai as genai + +# app.py +# import streamlit as st +# from load_file import load_file + +# st.title("Download HuggingFace Repo Files in Streamlit") + +# filename = st.text_input("Enter filename from repo:", "model.safetensors") + +# if st.button("Download"): +# try: +# local_path = load_file(filename) +# st.success(f"✅ File downloaded to: {local_path}") +# st.write("You can now use this file in your app.") +# except Exception as e: +# st.error(f"❌ Error: {str(e)}") + + +# Set API key (no need to assign OpenAI() to client like that) +# openai.api_key = os.getenv("OPENAI_API_KEY") + +# def ask_openai_scientific_validation(statement): +# prompt = f"""Assess the scientific accuracy of: "{statement}"\nRespond with ✅ (possible) or ❌ (impossible), and explain simply.""" + +# try: +# client = OpenAI() # This is correct placement +# response = client.chat.completions.create( +# model="gpt-3.5-turbo", +# messages=[ +# {"role": "system", "content": "You are a scientific fact-checker."}, +# {"role": "user", "content": prompt} +# ], +# temperature=0.7, +# max_tokens=150 +# ) + + +# return response.choices[0].message.content.strip() + +# except Exception as e: +# return f"⚠️ Could not verify:\n\n{str(e)}" + + + #huggung face code start +REPO_ID = "deedrop1140/nero-ml" +MODEL_DIR = "Models" + +def load_file(filename): + """Try to load model from local folder; if missing, download from Hugging Face Hub.""" + local_path = os.path.join(MODEL_DIR, filename) + + # 1️⃣ Check if file exists locally + if os.path.exists(local_path): + file_path = local_path + else: + # 2️⃣ Download from Hugging Face (Render case) + file_path = hf_hub_download(repo_id=REPO_ID, filename=filename) + + # 3️⃣ Load based on file extension + if filename.endswith((".pkl", ".joblib")): + return joblib.load(file_path) + elif filename.endswith(".npy"): + return np.load(file_path, allow_pickle=True) + elif filename.endswith((".pt", ".pth")): + return torch.load(file_path, map_location="cpu") + else: + return file_path + +# # ===================== +# # Replace your old model loads with this: +# # ===================== + +# # Models +# knn_model = load_file("Models/knn_model.pkl") +# lasso_model = load_file("Models/lasso_model.pkl") +# liar_model = load_file("Models/liar_model.joblib") +# linear_model = load_file("Models/linear_model.pkl") +# logistic_model = load_file("Models/logistic_model.pkl") +# nb_url_model = load_file("Models/nb_url_model.pkl") +# poly_model = load_file("Models/poly_model.pkl") +# rf_model = load_file("Models/rf_model.pkl") +# ridge_model = load_file("Models/ridge_model.pkl") +# supervised_model = load_file("Models/supervised_model.pkl") +# svr_model = load_file("Models/svr_model.pkl") +# voting_url_model = load_file("Models/voting_url_model.pkl") + +# # Vectorizers / Encoders / Scalers +# label_classes = load_file("Models/label_classes.npy") +# label_encoder = load_file("Models/label_encoder.pkl") +# lasso_scaler = load_file("Models/lasso_scaler.pkl") +# liar_vectorizer = load_file("Models/liar_vectorizer.joblib") +# nb_url_vectorizer = load_file("Models/nb_url_vectorizer.pkl") +# poly_transform = load_file("Models/poly_transform.pkl") +# ridge_scaler = load_file("Models/ridge_scaler.pkl") +# svr_scaler_X = load_file("Models/svr_scaler_X.pkl") +# svr_scaler_y = load_file("Models/svr_scaler_y.pkl") +# tfidf_vectorizer = load_file("Models/tfidf_vectorizer.pkl") +# url_vectorizer = load_file("Models/url_vectorizer.pkl") +# vectorizer_joblib = load_file("Models/vectorizer.joblib") +# vectorizer_pkl = load_file("Models/vectorizer.pkl") +# # huggung face code end + +MODEL_DIR = "Models" +DATA_DIR = "housedata" # Assuming your house data is here +UPLOAD_FOLDER = 'static/uploads' # NEW: Folder for temporary user uploads + +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER +CORS(app) + + + +genai.configure(api_key=os.getenv("GEMINI_API_KEY")) + +def ask_gemini(statement): + model = genai.GenerativeModel("gemini-2.0-flash-001") + response = model.generate_content(f"Verify this statement for truth: {statement}") + return response.text + +#rfc +# model = load("Models/liar_model.joblib") +# vectorizer = load("Models/liar_vectorizer.joblib") + +# Load BERT fact-checker pipeline (local model) +# bert_checker = pipeline("text-classification", model="microsoft/deberta-v3-small") + +#endrfc + +#svm + +# ==== SVM Setup ==== +X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, + n_clusters_per_class=1, n_classes=2, random_state=42) +scaler = StandardScaler() +X = scaler.fit_transform(X) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Train SVM +svm_model = SVC(kernel="linear") +svm_model.fit(X_train, y_train) + +#endsvm +#deision tree +GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") +GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" +#end deision tree + +# Ensure directories exist +os.makedirs(MODEL_DIR, exist_ok=True) +os.makedirs(DATA_DIR, exist_ok=True) +os.makedirs(UPLOAD_FOLDER, exist_ok=True) # NEW: Create upload folder + +def clean_text(text): + if pd.isnull(text): + return "" + text = text.lower() + text = re.sub(r"http\S+|www\S+|https\S+", '', text) + text = text.translate(str.maketrans('', '', string.punctuation)) + text = re.sub(r'\d+', '', text) + text = re.sub(r'\s+', ' ', text).strip() + return text + +# --- Helper functions for data generation (conceptual for demo) --- +def generate_linear_data(n_samples=100, noise=0.5): + X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) + y = 2 * X.squeeze() + 5 + noise * np.random.randn(n_samples) + return X, y + +def generate_non_linear_data(n_samples=100, noise=0.5): + X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) + y = np.sin(X.squeeze()) * 10 + noise * np.random.randn(n_samples) + return X, y + +def generate_noisy_data(n_samples=100, noise_factor=3.0): + X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) + y = 2 * X.squeeze() + 5 + noise_factor * np.random.randn(n_samples) # Increased noise + return X, y + +# Function to generate house price data (using your existing data structure for consistency) +def get_house_data(): + try: + df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) + # Using a subset of features for simplicity in demo + features = ['GrLivArea', 'OverallQual', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] + # Check if all required columns exist + if not all(col in df.columns for col in features + ['SalePrice']): + print("Warning: Missing one or more required columns in train.csv for house data.") + return None, None + X = df[features] + y = df['SalePrice'] + return X, y + except FileNotFoundError: + print(f"Error: train.csv not found in {DATA_DIR}. Please ensure your data is there.") + return None, None + except Exception as e: + print(f"Error loading house data: {e}") + return None, None + +# Dictionary to hold all loaded models +loaded_models = {} + +# Load logistic model and vectorizer for SMS +# vectorizer = joblib.load("Models/logvectorizer.pkl") +# model = joblib.load("Models/logistic_model.pkl") +# vectorizer = load_file("Models/logvectorizer.pkl") +# model = load_file("Models/logistic_model.pkl") + + +# # Load models once NB+DT+SVM is trained +# try: +# model = load_file("Models/logistic_model.pkl") +# # vectorizer = joblib.load("Models/logvectorizer.pkl") +# # model = joblib.load("Models/logistic_model.pkl") +# vectorizer = load_file("Models/vectorizer.pkl") +# print("✅ Model and vectorizer loaded into memory successfully!") +# except Exception as e: +# vectorizer = None +# model = None +# print(f"❌ Error: Could not load model or vectorizer. Please check your file paths. Error: {e}") +# #END NB+DT+SVM + +# === Naive Bayes URL Spam Classifier (NB_spam.html) === +# === Load Model & Vectorizer === + + + +# VT_API_KEY = os.getenv("VT_API_KEY") +# nb_model = load_file("Models/nb_url_model.pkl") +# vectorizer = load_file("Models/nb_url_vectorizer.pkl") + +# if nb_model is not None and vectorizer is not None: +# print("✅ Loaded model and vectorizer.") +# else: +# print("❌ Model or vectorizer not found.") + + + + + + +def load_all_models(): + """ + Loads all necessary models into the loaded_models dictionary when the app starts. + """ + global loaded_models + + # Load Supervised Model + # Load Supervised Model +try: + supervised_model_path = load_file("linear_model.pkl") + + # Debug: check what load_file actually returned + print("DEBUG -> supervised_model_path type:", type(supervised_model_path)) + + # If load_file returned a path (string), load with joblib + if isinstance(supervised_model_path, str): + loaded_models['supervised'] = joblib.load(supervised_model_path) + else: + # If load_file already returned the model object + loaded_models['supervised'] = supervised_model_path + + print("Supervised model loaded successfully") + +except FileNotFoundError: + print(f"Error: Supervised model file not found at {supervised_model_path}. " + "Please run train_model.py first.") + loaded_models['supervised'] = None # Mark as not loaded +except Exception as e: + print(f"Error loading supervised model: {e}") + loaded_models['supervised'] = None + + +# Load models when Flask app context is ready +with app.app_context(): + load_all_models() + +@app.route('/') +def frontpage(): + return render_template('frontpage.html') +@app.route('/home') +def home(): + return render_template('home.html') + +@app.route('/supervise') +def supervise(): + return render_template('supervise.html', active_page='supervise') + + +@app.route('/unsupervised') +def unsupervised(): + return render_template('unsupervised.html', active_page='unsupervised') + +# Semi-Supervised Learning page +@app.route('/semi-supervised') +def semi_supervised(): + return render_template('semi_supervised.html', active_page='semi_supervised') + +# Reinforcement Learning page +@app.route('/reinforcement') +def reinforcement(): + return render_template('reinforcement.html', active_page='reinforcement') + +# Ensemble Learning page +@app.route('/ensemble') +def ensemble(): + return render_template('ensemble.html', active_page='ensemble') + + +@app.route('/supervised', methods=['GET', 'POST']) +def supervised(): + prediction = None + hours_studied_input = None + + if loaded_models['supervised'] is None: + return "Error: Supervised model could not be loaded. Please check server logs.", 500 + + if request.method == 'POST': + try: + hours_studied_input = float(request.form['hours']) + input_data = np.array([[hours_studied_input]]) + + predicted_score = loaded_models['supervised'].predict(input_data)[0] + prediction = round(predicted_score, 2) + + except ValueError: + print("Invalid input for hours studied.") + prediction = "Error: Please enter a valid number." + except Exception as e: + print(f"An error occurred during prediction: {e}") + prediction = "Error during prediction." + + return render_template('supervised.html', prediction=prediction, hours_studied_input=hours_studied_input) + + +@app.route('/polynomial', methods=['GET', 'POST']) +def polynomial(): + if request.method == 'POST': + try: + hours = float(request.form['hours']) + + # model = joblib.load('Models/poly_model.pkl') + # poly = joblib.load('Models/poly_transform.pkl') + # model = load_file("Models/poly_model.pkl") + # poly= load_file("Models/poly_transform.pkl") + model = load_file("poly_model.pkl") + poly= load_file("poly_transform.pkl") + + transformed_input = poly.transform([[hours]]) + prediction = model.predict(transformed_input)[0] + + return render_template("poly.html", prediction=round(prediction, 2), hours=hours) + + except Exception as e: + print(f"Error: {e}") + return render_template("poly.html", error="Something went wrong.") + + return render_template("poly.html") + + +@app.route('/random_forest', methods=['GET', 'POST']) +def random_forest(): + if request.method == 'POST': + try: + hours = float(request.form['hours']) + model = load_file("rf_model.pkl") + # model = joblib.load('Models/rf_model.pkl') + prediction = model.predict([[hours]])[0] + + return render_template("rf.html", prediction=round(prediction, 2), hours=hours) + except Exception as e: + print(f"[ERROR] {e}") + return render_template("rf.html", error="Prediction failed. Check your input.") + return render_template("rf.html") + +@app.route('/prediction_flow') +def prediction_flow(): + return render_template('prediction_flow.html') + +@app.route("/lasso", methods=["GET", "POST"]) +def lasso(): + if request.method == "POST": + try: + inputs = [float(request.form.get(f)) for f in ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']] + + # model = load_file("Models/lasso_model.pkl") + # scaler = load_file("Models/lasso_scaler.pkl") + # model = joblib.load("Models/lasso_model.pkl") + # scaler = joblib.load("Models/lasso_scaler.pkl") + model = load_file("lasso_model.pkl") + scaler = load_file("lasso_scaler.pkl") + + scaled_input = scaler.transform([inputs]) + + prediction = model.predict(scaled_input)[0] + return render_template("lasso.html", prediction=round(prediction, 2)) + + except Exception as e: + return render_template("lasso.html", error=str(e)) + + return render_template("lasso.html") + + +@app.route('/ridge', methods=['GET', 'POST']) +def ridge(): + prediction = None + error = None + + try: + # model = load_file("Models/ridge_model.pkl") + # scaler = load_file("Models/ridge_scaler.pkl") + # model = joblib.load(os.path.join(MODEL_DIR, 'ridge_model.pkl')) + # scaler = joblib.load(os.path.join(MODEL_DIR, 'ridge_scaler.pkl')) + + model = load_file("ridge_model.pkl") + scaler = load_file("ridge_scaler.pkl") + + + except Exception as e: + return f"❌ Error loading Ridge model: {e}", 500 + + if request.method == 'POST': + try: + features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] + input_data = [float(request.form[feature]) for feature in features] + input_scaled = scaler.transform([input_data]) + prediction = model.predict(input_scaled)[0] + except Exception as e: + error = str(e) + + return render_template('ridge.html', prediction=prediction, error=error) + +@app.route('/dtr', methods=['GET', 'POST']) +def dtr(): + if request.method == 'GET': + return render_template('dtr.html') + + if request.method == 'POST': + data = request.get_json() + data_points = data.get('dataPoints') if data else None + print("Received data:", data_points) + return jsonify({'message': 'Data received successfully!', 'receivedData': data_points}) + + +@app.route('/dtrg') +def drg(): + return render_template('desiciongame.html') + +# --- SVR Routes --- +@app.route('/svr') # This route is for the initial GET request to load the page +def svr_page(): + return render_template('svr.html') + +@app.route('/decision-tree') +def decision_tree(): + return render_template('decision-Tree.html') + +@app.route('/decision-tree-game') +def decision_tree_game(): + return render_template('Decision-Tree-Game.html') + + +@app.route('/run_svr_demo', methods=['POST']) +def run_svr_demo(): + try: + # Check if the request contains JSON (for predefined datasets) or FormData (for file uploads) + if request.is_json: + data = request.json + else: + # For FormData, data is accessed via request.form for fields, request.files for files + data = request.form + + dataset_type = data.get('dataset_type', 'linear') + kernel_type = data.get('kernel', 'rbf') + C_param = float(data.get('C', 1.0)) + gamma_param = float(data.get('gamma', 0.1)) + epsilon_param = float(data.get('epsilon', 0.1)) + + X, y = None, None + + if dataset_type == 'linear': + X, y = generate_linear_data() + elif dataset_type == 'non_linear': + X, y = generate_non_linear_data() + elif dataset_type == 'noisy': + X, y = generate_noisy_data() + elif dataset_type == 'house_data': + X_house, y_house = get_house_data() + if X_house is not None and not X_house.empty: + X = X_house[['GrLivArea']].values # Only GrLivArea for simple 1D plotting + y = y_house.values + else: + X, y = generate_linear_data() # Fallback if house data is missing/invalid + elif dataset_type == 'custom_csv': # NEW: Handle custom CSV upload + uploaded_file = request.files.get('file') + x_column_name = data.get('x_column_name') + y_column_name = data.get('y_column_name') + + if not uploaded_file or uploaded_file.filename == '': + return jsonify({'error': 'No file uploaded for custom CSV.'}), 400 + if not x_column_name or not y_column_name: + return jsonify({'error': 'X and Y column names are required for custom CSV.'}), 400 + + try: + # Read CSV into a pandas DataFrame from in-memory BytesIO object + df = pd.read_csv(io.BytesIO(uploaded_file.read())) + + if x_column_name not in df.columns or y_column_name not in df.columns: + missing_cols = [] + if x_column_name not in df.columns: missing_cols.append(x_column_name) + if y_column_name not in df.columns: missing_cols.append(y_column_name) + return jsonify({'error': f"Missing columns in uploaded CSV: {', '.join(missing_cols)}"}), 400 + + X = df[[x_column_name]].values # Ensure X is 2D for scikit-learn + y = df[y_column_name].values + except Exception as e: + return jsonify({'error': f"Error reading or processing custom CSV: {str(e)}"}), 400 + else: # Fallback for unknown dataset types + X, y = generate_linear_data() + + + if X is None or y is None or len(X) == 0: + return jsonify({'error': 'Failed to generate or load dataset.'}), 500 + + # Scale data + scaler_X = StandardScaler() + scaler_y = StandardScaler() + + X_scaled = scaler_X.fit_transform(X) + y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten() + + X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42) + + # Train SVR model + svr_model = SVR(kernel=kernel_type, C=C_param, gamma=gamma_param, epsilon=epsilon_param) + svr_model.fit(X_train, y_train) + + # Make predictions + y_pred_scaled = svr_model.predict(X_test) + + # Inverse transform predictions to original scale for metrics + y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() + y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten() + + # Calculate metrics + mse = mean_squared_error(y_test_original, y_pred) + r2 = r2_score(y_test_original, y_pred) + support_vectors_count = len(svr_model.support_vectors_) + + # Prepare data for plotting + plot_X_original = scaler_X.inverse_transform(X_scaled) + plot_y_original = scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten() + + x_plot = np.linspace(plot_X_original.min(), plot_X_original.max(), 500).reshape(-1, 1) + x_plot_scaled = scaler_X.transform(x_plot) + y_plot_scaled = svr_model.predict(x_plot_scaled) + y_plot_original = scaler_y.inverse_transform(y_plot_scaled.reshape(-1, 1)).flatten() + + y_upper_scaled = y_plot_scaled + epsilon_param + y_lower_scaled = y_plot_scaled - epsilon_param + y_upper_original = scaler_y.inverse_transform(y_upper_scaled.reshape(-1, 1)).flatten() + y_lower_original = scaler_y.inverse_transform(y_lower_scaled.reshape(-1, 1)).flatten() + + plot_data = { + 'data': [ + { + 'x': plot_X_original.flatten().tolist(), + 'y': plot_y_original.tolist(), + 'mode': 'markers', + 'type': 'scatter', + 'name': 'Original Data' + }, + { + 'x': x_plot.flatten().tolist(), + 'y': y_plot_original.tolist(), + 'mode': 'lines', + 'type': 'scatter', + 'name': 'SVR Prediction', + 'line': {'color': 'red'} + }, + { + 'x': x_plot.flatten().tolist(), + 'y': y_upper_original.tolist(), + 'mode': 'lines', + 'type': 'scatter', + 'name': 'Epsilon Tube (Upper)', + 'line': {'dash': 'dash', 'color': 'green'}, + 'fill': 'tonexty', + 'fillcolor': 'rgba(0,128,0,0.1)' + }, + { + 'x': x_plot.flatten().tolist(), + 'y': y_lower_original.tolist(), + 'mode': 'lines', + 'type': 'scatter', + 'name': 'Epsilon Tube (Lower)', + 'line': {'dash': 'dash', 'color': 'green'} + } + ], + 'layout': { + 'title': f'SVR Regression (Kernel: {kernel_type.upper()})', + 'xaxis': {'title': 'Feature Value'}, + 'yaxis': {'title': 'Target Value'}, + 'hovermode': 'closest' + } + } + + return jsonify({ + 'mse': mse, + 'r2_score': r2, + 'support_vectors_count': support_vectors_count, + 'plot_data': plot_data + }) + + except Exception as e: + print(f"Error in SVR demo: {e}") + return jsonify({'error': str(e)}), 500 + + +def clean_text(text): + return text.lower().strip() + + + + # Gradient-desent route +@app.route('/gradient-descent') +def gradient_descent(): + return render_template('Gradient-Descen.html') + +# Gradient-boosting route +@app.route('/gradient-boosting') +def gradient_boosting(): + return render_template('Gradient-Boosting.html') + +# Gradient-xgboost route +@app.route('/xgboost-regression') +def xgboost_regression(): + return render_template('XGBoost-Regression.html') + +#Gradient-lightgbm route +@app.route('/lightgbm') +def lightgbm(): + return render_template('LightGBM-Regression.html') + +#nerual network route for calssifcation +@app.route('/neural-network-classification') +def neural_network_classification(): + return render_template('Neural-Networks-for-Classification.html') + +#hierarchical clustering route + +@app.route('/hierarchical-clustering') +def hierarchical_clustering(): + return render_template('Hierarchical-Clustering.html') + +#Gaussian-mixture-models route +@app.route('/gaussian-mixture-models') +def gaussian_mixture_models(): + return render_template('Gaussian-Mixture-Models.html') + +#Principal-Component-Analysis +@app.route('/pca') +def pca(): + return render_template('Principal-Component-Analysis.html') + +#t-sne +@app.route('/t-sne') +def tsne(): + return render_template('t-SNE.html') + +# liner-discriminant-analysis +@app.route('/lda') +def lda(): + return render_template('Linear-Discriminant-Analysis.html') + +# Independent-Component-Analysis +@app.route('/ica') +def ica(): + return render_template('Independent-Component-Analysis.html') + +#Apriori +@app.route('/apriori') +def apriori(): + return render_template('Apriori-Algorithm.html') + + +# Eclat Algorithm +@app.route('/eclat') +def eclat(): + return render_template('Eclat-Algorithm.html') + +#genrative models +@app.route('/generative-models') +def generative_models(): + return render_template('Generative-Models.html') + +#self training +@app.route('/self-training') +def self_training(): + return render_template('Self-Training.html') + + +# TRANSDUCTIVE SVM +@app.route('/transductive-svm') +def transductive_svm(): + return render_template('Transductive-SVM.html') + + +#Graph-Based Methods +@app.route('/graph-based-methods') +def graph_based_methods(): + return render_template('Graph-Based-Method.html') + +#Agent-Environment-State +@app.route('/agent-environment-state') +def agent_environment_state(): + return render_template('Agent-Environment-State.html') + +#Action and Policy +@app.route('/action-and-policy') +def action_and_policy(): + return render_template('Action-and-Policy.html') + +#Reward-ValueFunction +@app.route('/reward-valuefunction') +def reward_valuefunction(): + return render_template('Reward-ValueFunction.html') + +#Q-Learning +@app.route('/q-learning') +def q_learning(): + return render_template('Q-Learning.html') + +#Deep Reinforcement Learning +@app.route('/deep-reinforcement-learning') +def deep_reinforcement_learning(): + return render_template('Deep-Reinforcement-Learning.html') + + +#Bagging +@app.route('/bagging') +def bagging(): + return render_template('Bagging.html') + +#Boosting +@app.route('/boosting') +def boosting(): + return render_template('Boosting.html') + +# stacking +@app.route('/stacking') +def stacking(): + return render_template('Stacking.html') + +# voting +@app.route('/voting') +def voting(): + return render_template('Voting.html') + +import re + +# Load saved model and vectorizer +# model = joblib.load("Models/logistic_model.pkl") +# vectorizer = joblib.load("Models/logvectorizer.pkl") + + +# Text cleaning +def clean_text(text): + text = text.lower() + text = re.sub(r'\W', ' ', text) + text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text) + text = re.sub(r'\s+', ' ', text) + return text.strip() + +@app.route('/logistic', methods=['GET', 'POST']) +def logistic(): + prediction, confidence_percentage, cleaned, tokens, probability = None, None, None, None, None + + + # model = load_file("Models/logistic_model.pkl") + # vectorizer = load_file("Models/logvectorizer.pkl") + model = load_file("logistic_model.pkl") + vectorizer = load_file("logvectorizer.pkl") + + if request.method == "POST": + msg = request.form.get('message', '') + cleaned = clean_text(msg) + tokens = cleaned.split() + + + try: + vector = vectorizer.transform([cleaned]) + probability = model.predict_proba(vector)[0][1] + prediction = "Spam" if probability >= 0.5 else "Not Spam" + confidence_percentage = round(probability * 100, 2) + except Exception as e: + print("Error predicting:", e) + prediction = "Error" + confidence_percentage = 0 + + return render_template( + "logistic.html", + prediction=prediction, + confidence_percentage=confidence_percentage, + cleaned=cleaned, + tokens=tokens, + probability=round(probability, 4) if probability else None, + source="sms" + ) + +@app.route('/logistic-sms', methods=['POST']) +def logistic_sms(): + try: + data = request.get_json() + msg = data.get('message', '') + cleaned = clean_text(msg) + tokens = cleaned.split() + + vector = vectorizer.transform([cleaned]) + probability = model.predict_proba(vector)[0][1] + prediction = "Spam" if probability >= 0.5 else "Not Spam" + confidence_percentage = round(probability * 100, 2) + + return jsonify({ + "prediction": prediction, + "confidence": confidence_percentage, + "probability": round(probability, 4), + "cleaned": cleaned, + "tokens": tokens, + "source": "json" + }) + + except Exception as e: + print("Error in /logistic-sms:", e) + return jsonify({"error": "Internal server error", "details": str(e)}), 500 + + + +# @app.route("/logistic", methods=["GET", "POST"]) +# def logistic(): +# prediction = None +# error = None +# if request.method == "POST": +# try: +# input_text = request.form.get("message") + +# # Load the vectorizer and logistic model from Models folder +# vectorizer = joblib.load("Models/vectorizer.pkl") +# model = joblib.load("Models/logistic_model.pkl") + +# # Transform input and make prediction +# input_vector = vectorizer.transform([input_text]) +# result = model.predict(input_vector)[0] + +# prediction = "✅ Not Spam" if result == 0 else "🚨 Spam" +# except Exception as e: +# error = str(e) + +# return render_template("logistic.html", prediction=prediction, error=error) + + + + + + +@app.route("/knn") +def knn_visual(): + return render_template("knn.html") + +@app.route('/knn_visual_predict', methods=['POST']) +def knn_visual_predict(): + data = request.get_json() + points = np.array(data['points']) # shape: (N, 3) + test_point = np.array(data['test_point']) # shape: (2,) + k = int(data['k']) + + X = points[:, :2] + y = points[:, 2].astype(int) + + knn = KNeighborsClassifier(n_neighbors=k) + knn.fit(X, y) + pred = knn.predict([test_point])[0] + + dists = np.linalg.norm(X - test_point, axis=1) + neighbor_indices = np.argsort(dists)[:k] + neighbors = X[neighbor_indices] + + return jsonify({ + 'prediction': int(pred), + 'neighbors': neighbors.tolist() + }) + +# 🔷 Route 2: KNN Digit Image Classifier (Upload-based) +@app.route("/knn_image") +def knn_image_page(): + return render_template("knn_image.html") + +from PIL import Image + +@app.route("/predict_image", methods=["POST"]) +def predict_image(): + if "image" not in request.files: + return jsonify({"error": "No image uploaded"}), 400 + + file = request.files["image"] + + try: + # ✅ Use PIL to open image from file bytes + image = Image.open(file.stream).convert("RGB") + image = image.resize((32, 32)) # Resize to match training size + img_array = np.array(image).flatten().reshape(1, -1) + except Exception as e: + return jsonify({"error": f"Invalid image. {str(e)}"}), 400 + + # Load model & labels + + # model = load_file("Models/knn_model.pkl") + # label_classes = load_file("Models/lasso_model.pkl") + # model = joblib.load("Models/knn_model.pkl") + # label_classes = np.load("Models/label_classes.npy") + model = load_file("knn_model.pkl") + label_classes = load_file("label_classes.npy") + + # Predict class and get probabilities + probs = model.predict_proba(img_array)[0] + pred_index = np.argmax(probs) + pred_label = label_classes[pred_index] + confidence = round(float(probs[pred_index]) * 100, 2) + + return jsonify({ + "prediction": str(pred_label), + "confidence": f"{confidence}%", + "all_probabilities": { + str(label_classes[i]): round(float(probs[i]) * 100, 2) + for i in range(len(probs)) + } + }) + +@app.route("/rfc") +def random_forest_page(): + return render_template("Random_Forest_Classifier.html") # Your beautiful HTML goes in rfc.html + +@app.route('/rf_visual_predict', methods=['POST']) +def rf_visual_predict(): + try: + data = request.get_json() + print("📦 Incoming JSON data:", data) + + labeled_points = data.get('points') + test_point = data.get('test_point') + + if not labeled_points or not test_point: + return jsonify({"error": "Missing points or test_point"}), 400 + + df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) + X = df[['X1', 'X2']] + y = df['Class'] + + rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42) + rf_model.fit(X, y) + + test_point_np = np.array(test_point).reshape(1, -1) + prediction = int(rf_model.predict(test_point_np)[0]) + + x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 + y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + + Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + + return jsonify({ + 'prediction': prediction, + 'decision_boundary_z': Z.tolist(), + 'decision_boundary_x_coords': xx[0, :].tolist(), + 'decision_boundary_y_coords': yy[:, 0].tolist() + }) + + except Exception as e: + import traceback + print("❌ Exception in /rf_visual_predict:") + traceback.print_exc() # Print full error stack trace + return jsonify({"error": str(e)}), 500 + +@app.route("/liar") +def liar_input_page(): + return render_template("rfc_liar_predict.html") + + + + + + + +@app.route("/ref/liar/predictor", methods=["POST"]) +def liar_predictor(): + try: + data = request.get_json() + statement = data.get("statement", "") + + if not statement: + return jsonify({"success": False, "error": "Missing statement"}), 400 + + try: + # 🔍 LIAR Model Prediction + features = vectorizer.transform([statement]) + prediction = model.predict(features)[0] + + liar_label_map = { + 0: "It can be false 🔥", + 1: "False ❌", + 2: "Mostly false but can be true 🤏", + 3: "Half True 🌓", + 4: "Mostly True 👍", + 5: "True ✅" + } + + prediction_label = liar_label_map.get(int(prediction), "Unknown") + + except ValueError as ve: + if "features" in str(ve): + # Fallback to Gemini API + prediction_label = ask_gemini(statement) + else: + raise ve + + # 🧠 BERT-Based Scientific Check + bert_result = bert_checker(statement)[0] + bert_label = bert_result["label"] + bert_score = round(bert_result["score"] * 100, 2) + + science_label_map = { + "LABEL_0": "✅ Scientifically Possible", + "LABEL_1": "❌ Scientifically Impossible" + } + + scientific_check = f"{science_label_map.get(bert_label, bert_label)} ({bert_score:.2f}%)" + + return jsonify({ + "success": True, + "prediction": prediction_label, + "reason": "Predicted from linguistic and content-based patterns, or Gemini fallback.", + "scientific_check": scientific_check + }) + + except Exception as e: + traceback.print_exc() + return jsonify({"success": False, "error": str(e)}), 500 + + + +#svm +@app.route("/svm") +def svm_page(): + return render_template("svm.html") + +@app.route('/svm_visual_predict', methods=['POST']) +def svm_visual_predict(): + data = request.json + labeled_points = data['points'] + test_point = data['test_point'] + svm_type = data['svm_type'] + c_param = float(data['c_param']) + gamma_param = float(data['gamma_param']) # Will be ignored for linear kernel + + df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) + X = df[['X1', 'X2']] + y = df['Class'] + + # 1. Train the SVM Classifier + if svm_type == 'linear': + svm_model = svm.SVC(kernel='linear', C=c_param, random_state=42) + elif svm_type == 'rbf': + svm_model = svm.SVC(kernel='rbf', C=c_param, gamma=gamma_param, random_state=42) + else: + return jsonify({'error': 'Invalid SVM type'}), 400 + + svm_model.fit(X, y) + + # 2. Predict for the test point + test_point_np = np.array(test_point).reshape(1, -1) + prediction = int(svm_model.predict(test_point_np)[0]) + + # 3. Get Support Vectors + # support_vectors_ refers to indices of support vectors + # svc_model.support_vectors_ gives the actual support vectors + support_vectors = svm_model.support_vectors_.tolist() + + # 4. Generate data for the decision boundary + # Create a meshgrid of points to predict across the entire plot area + x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 + y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 + + # Extend range slightly to ensure test point is within boundary if it's an outlier + x_min = min(x_min, test_point_np[0,0] - 1) + x_max = max(x_max, test_point_np[0,0] + 1) + y_min = min(y_min, test_point_np[0,1] - 1) + y_max = max(y_max, test_point_np[0,1] + 1) + + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + + # Predict class for each point in the meshgrid + Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + + # Convert numpy arrays to lists for JSON serialization + decision_boundary_z = Z.tolist() + decision_boundary_x_coords = xx[0, :].tolist() + decision_boundary_y_coords = yy[:, 0].tolist() + + return jsonify({ + 'prediction': prediction, + 'decision_boundary_z': decision_boundary_z, + 'decision_boundary_x_coords': decision_boundary_x_coords, + 'decision_boundary_y_coords': decision_boundary_y_coords, + 'support_vectors': support_vectors + }) + + + + + + + +@app.route('/api/explain', methods=['POST']) +def explain(): + # In a real deployed environment, you'd secure your API key. + # For Canvas, it's automatically injected if GEMINI_API_KEY is empty string. + # If running locally and not in Canvas, set GEMINI_API_KEY in your environment variables. + if not GEMINI_API_KEY and not os.getenv("FLASK_ENV") == "development": # Allow empty key in dev for local testing + return jsonify({'error': 'Missing API key'}), 500 + + payload = request.get_json() + + try: + response = requests.post( + f"{GEMINI_URL}?key={GEMINI_API_KEY}", + headers={"Content-Type": "application/json"}, + json=payload + ) + response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) + return jsonify(response.json()) + except requests.exceptions.RequestException as e: + app.logger.error(f"Error calling Gemini API: {e}") # Log the error on the server side + return jsonify({'error': str(e)}), 500 + +@app.route('/decision_tree') +def decision_tree_page(): + # This route serves your Decision Tree visualization page + # Ensure the HTML file name matches (e.g., 'decision_tree_viz.html' or 'decision_tree.html') + return render_template('decision_tree.html') # Check your actual HTML file name here + + +@app.route('/game') +def decision_tree_game(): + """Renders the interactive game page for decision trees.""" + return render_template('decision_tree_game.html') + +@app.route('/dt_visual_predict', methods=['POST']) +def dt_visual_predict(): + try: + data = request.json + labeled_points = data['points'] + test_point = data['test_point'] + max_depth = int(data['max_depth']) + + # Convert labeled_points to a pandas DataFrame + df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) + X = df[['X1', 'X2']] + y = df['Class'] + + # Check if there's enough data to train + if X.empty or len(X) < 2: + return jsonify({'error': 'Not enough data points to train the model.'}), 400 + + # 1. Train the Decision Tree Classifier (This is the "model" part) + dt_model = DecisionTreeClassifier(max_depth=max_depth, random_state=42) + dt_model.fit(X, y) + + # 2. Predict for the test point + test_point_np = np.array(test_point).reshape(1, -1) + prediction = int(dt_model.predict(test_point_np)[0]) + + # 3. Generate data for the decision boundary + x_min, x_max = X['X1'].min(), X['X1'].max() + y_min, y_max = X['X2'].min(), X['X2'].max() + + # Add a buffer to the plot range to make sure points are not on the edge + # And handle cases where min == max (e.g., all points have same X1 value) + x_buffer = 1.0 if (x_max - x_min) == 0 else (x_max - x_min) * 0.1 + y_buffer = 1.0 if (y_max - y_min) == 0 else (y_max - y_min) * 0.1 + + x_min -= x_buffer + x_max += x_buffer + y_min -= y_buffer + y_max += y_buffer + + # Ensure test point is also comfortably within the range + x_min = min(x_min, test_point_np[0,0] - 0.5) + x_max = max(x_max, test_point_np[0,0] + 0.5) + y_min = min(y_min, test_point_np[0,1] - 0.5) + y_max = max(y_max, test_point_np[0,1] + 0.5) + + # Create a meshgrid for plotting the decision boundary + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + + # Predict class for each point in the meshgrid using the trained model + Z = dt_model.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + + # Convert numpy arrays to lists for JSON serialization + decision_boundary_z = Z.tolist() + decision_boundary_x_coords = xx[0, :].tolist() + decision_boundary_y_coords = yy[:, 0].tolist() + + return jsonify({ + 'prediction': prediction, + 'decision_boundary_z': decision_boundary_z, + 'decision_boundary_x_coords': decision_boundary_x_coords, + 'decision_boundary_y_coords': decision_boundary_y_coords + }) + except Exception as e: + # This will print the actual error to your terminal + print(f"An error occurred in /dt_visual_predict: {e}") + # Return a more informative error message to the frontend + return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 + + # --- Naive Bayes Routes --- + +from urllib.parse import urlparse +from sklearn.naive_bayes import GaussianNB +from nltk.corpus import words + +nb_model = load_file("nb_url_model.pkl") +vectorizer = load_file("nb_url_vectorizer.pkl") + +# if nb_model is not None and vectorizer is not None: +# print("✅ Loaded Naive Bayes URL model") +# else: +# nb_model, vectorizer = None, None +# print("❌ vectorizer not found") + + + +@app.route('/nb_spam') +def nb_spam_page(): + return render_template('NB_spam.html') + + +import re +from urllib.parse import urlparse +from spellchecker import SpellChecker +import wordninja + + + +# ---- Whitelist (your full one, unchanged) ---- +whitelist = set([ + # Search Engines + 'google', 'bing', 'yahoo', 'duckduckgo', 'baidu', 'ask', + + # Social Media + 'facebook', 'instagram', 'twitter', 'linkedin', 'snapchat', 'tiktok', + 'threads', 'pinterest', 'reddit', 'quora', + + # Communication Tools + 'whatsapp', 'telegram', 'skype', 'zoom', 'meet', 'discord', + 'teams', 'signal', 'messenger', + + # Global E-commerce + 'amazon', 'ebay', 'shopify', 'alibaba', 'walmart', 'target', + 'etsy', 'shein', 'bestbuy', 'costco', 'newegg', + + # Indian E-commerce / Services + 'flipkart', 'myntra', 'ajio', 'nykaa', 'meesho', 'snapdeal', + 'paytm', 'phonepe', 'mobikwik', 'zomato', 'swiggy', 'ola', 'uber', 'bookmyshow', + 'ixigo', 'makemytrip', 'yatra', 'redbus', 'bigbasket', 'grofers', 'blinkit', + 'universalcollegeofengineering', + + # Education / Productivity + 'youtube', 'docs', 'drive', 'calendar', 'photos', 'gmail', 'notion', + 'edx', 'coursera', 'udemy', 'khanacademy', 'byjus', 'unacademy', + + # News / Media / Tech + 'bbc', 'cnn', 'nyt', 'forbes', 'bloomberg', 'reuters', + 'ndtv', 'indiatimes', 'thehindu', 'hindustantimes', 'indiatoday', + 'techcrunch', 'verge', 'wired', + + # Streaming / Entertainment + 'netflix', 'hotstar', 'primevideo', 'spotify', 'gaana', 'wynk', 'saavn', 'voot', + + # Dev & Tools + 'github', 'stackoverflow', 'medium', 'gitlab', 'bitbucket', + 'adobe', 'figma', 'canva', + + # Financial / Banking + 'hdfcbank', 'icicibank', 'sbi', 'axisbank', 'kotak', 'boi', 'upi', + 'visa', 'mastercard', 'paypal', 'stripe', 'razorpay', 'phonepe', 'paytm', + + # Government / Utilities + 'gov', 'nic', 'irctc', 'uidai', 'mygov', 'incometax', 'aadhar', 'rbi', + + # Others Common + 'airtel', 'jio', 'bsnl', 'vi', 'speedtest', 'cricbuzz', 'espn', 'espncricinfo', + 'wikipedia', 'mozilla', 'opera', 'chrome', 'android', 'apple', 'windows', 'microsoft' +]) + + # ... your full whitelist from before ... + + +# ---- Trusted & Bad TLDs ---- +trusted_tlds = [ + '.gov', '.nic.in', '.edu', '.ac.in', '.mil', '.org', '.int', + '.co.in', '.gov.in', '.res.in', '.net.in', '.nic.gov.in' +] + +# Expanded Bad TLDs (Rule 4) +bad_tlds = [ + '.xyz', '.tk', '.ml', '.ga', '.cf', '.top', '.gq', '.cn', + '.ru', '.pw', '.bid', '.link', '.loan', '.party', '.science', + '.stream', '.webcam', '.online', '.site', '.website', '.space', + '.club', '.buzz', '.info' +] + +# Suspicious extensions (Rule 13) +suspicious_extensions = ['.exe', '.zip', '.rar', '.js', '.php', '.asp', '.aspx', '.jsp', '.sh'] + +# Phishing keywords (Rule 11, your full list) +phishing_keywords = [ + 'login', 'verify', 'secure', 'account', 'update', 'confirm', 'authenticate', + 'free', 'bonus', 'offer', 'prize', 'winner', 'gift', 'coupon', 'discount', + 'bank', 'paypal', 'creditcard', 'mastercard', 'visa', 'amex', 'westernunion', + 'signin', 'click', 'password', 'unlock', 'recover', 'validate', 'urgency', + 'limitedtime', 'expires', 'suspicious', 'alert', 'important', 'actionrequired' +] + +# ---- Rules 5–14 ---- +rules = { + 5: r"https?://\d{1,3}(\.\d{1,3}){3}", + 6: r"@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", + 7: r"(free money|win now|click here)", + 8: r"https?://[^\s]*\.(ru|cn|tk)", + 9: r"https?://.{0,6}\..{2,6}/.{0,6}", + 10: r"[0-9]{10,}", + 12: r"https?://[^\s]*@[^\s]+", + 13: r"https?://[^\s]*//[^\s]+", + 14: r"https?://[^\s]*\?(?:[^=]+=[^&]*&){5,}", +} + + +# ---- Gibberish Check Helper (Rule 15) ---- +def is_gibberish_word(word): + vowels = "aeiou" + v_count = sum(c in vowels for c in word) + return v_count / len(word) < 0.25 + +# # ---- Utility: Extract words from URL ---- +# def extract_words(url): +# parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) +# raw = parsed.netloc.replace('-', '') + parsed.path.replace('-', '') +# # Split using wordninja +# words = wordninja.split(raw.lower()) +# # Keep only alphabetic words of length >= 3 +# words = [w for w in words if w.isalpha() and len(w) >= 3] +# return words +# ---- Extract words from URL ---- +def extract_words(url): + parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) + parts = re.split(r'\W+', parsed.netloc + parsed.path) + final_words = [] + for word in parts: + if len(word) > 2 and word.isalpha(): + split_words = wordninja.split(word.lower()) + if len(split_words) <= 1: + split_words = [word.lower()] + final_words.extend(split_words) + return final_words + + +# --- Your original predict function, now inside the Flask app --- +@app.route("/predict", methods=["POST"]) +def predict(): + try: + data = request.get_json() + url = data.get("url", "").lower() + if not url: + return jsonify({'error': 'No URL provided'}), 400 + + parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) + path = parsed.path + + # ---- SpellChecker using built-in dictionary ---- + spell = SpellChecker(distance=1) + + # ---- Extract words and check spelling ---- + words = extract_words(url) + # ignore known TLDs + tlds_to_ignore = [tld.replace('.', '',"/") for tld in trusted_tlds + bad_tlds] + words_for_spellcheck = [w for w in words if w not in tlds_to_ignore] + + misspelled = spell.unknown(words_for_spellcheck) + steps = [{"word": w, "valid": (w not in misspelled) or (w in tlds_to_ignore)} for w in words] + + if misspelled: + return jsonify({ + "prediction": 1, + "reason": f"🧾 Spelling errors: {', '.join(misspelled)}", + "steps": steps + }) + else: + return jsonify({ + "prediction": 0, + "reason": "✅ No spelling issues", + "steps": steps + }) + + except Exception as e: + return jsonify({'error': f"An issue occurred during spell checking: {str(e)}"}), 500 + + + + +@app.route('/naive_bayes') +def naive_bayes_page(): + return render_template('naive_bayes_viz.html') + + # --- New Naive Bayes Prediction Route --- +@app.route('/nb_visual_predict', methods=['POST']) +def nb_visual_predict(): + try: + data = request.json + labeled_points = data['points'] + test_point = data['test_point'] + + df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) + X = df[['X1', 'X2']] + y = df['Class'] + + # Ensure enough data and at least two classes for classification + if X.empty or len(X) < 2: + return jsonify({'error': 'Not enough data points to train the model.'}), 400 + if len(y.unique()) < 2: + return jsonify({'error': 'Need at least two different classes to classify.'}), 400 + + # Train Gaussian Naive Bayes Model + # GaussianNB is suitable for continuous data + nb_model = GaussianNB() + nb_model.fit(X, y) + + # Predict for the test point + test_point_np = np.array(test_point).reshape(1, -1) + prediction = int(nb_model.predict(test_point_np)[0]) + + # Generate data for the decision boundary + x_min, x_max = X['X1'].min(), X['X1'].max() + y_min, y_max = X['X2'].min(), X['X2'].max() + + x_buffer = 1.0 if x_max - x_min == 0 else (x_max - x_min) * 0.1 + y_buffer = 1.0 if y_max - y_min == 0 else (y_max - y_min) * 0.1 + + x_min -= x_buffer + x_max += x_buffer + y_min -= y_buffer + y_max += y_buffer + + x_min = min(x_min, test_point_np[0,0] - 0.5) + x_max = max(x_max, test_point_np[0,0] + 0.5) + y_min = min(y_min, test_point_np[0,1] - 0.5) + y_max = max(y_max, test_point_np[0,1] + 0.5) + + xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), + np.linspace(y_min, y_max, 100)) + + if xx.size == 0 or yy.size == 0: + return jsonify({'error': 'Meshgrid could not be created. Data range too narrow.'}), 400 + + # Predict class for each point in the meshgrid + # Use predict_proba and then argmax to get class for decision boundary coloring + Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()]) + Z = Z.reshape(xx.shape) + + decision_boundary_z = Z.tolist() + decision_boundary_x_coords = xx[0, :].tolist() + decision_boundary_y_coords = yy[:, 0].tolist() + + return jsonify({ + 'prediction': prediction, + 'decision_boundary_z': decision_boundary_z, + 'decision_boundary_x_coords': decision_boundary_x_coords, + 'decision_boundary_y_coords': decision_boundary_y_coords + }) + except Exception as e: + print(f"An error occurred in /nb_visual_predict: {e}") + return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 + +def check_with_virustotal(url): + try: + headers = {"x-apikey": VT_API_KEY} + submit_url = "https://www.virustotal.com/api/v3/urls" + + # Submit the URL for scanning + response = requests.post(submit_url, headers=headers, data={"url": url}) + url_id = response.json()["data"]["id"] + + # Fetch result + result = requests.get(f"{submit_url}/{url_id}", headers=headers) + data = result.json() + + stats = data["data"]["attributes"]["last_analysis_stats"] + malicious_count = stats.get("malicious", 0) + + if malicious_count > 0: + return True, f"☣️ VirusTotal flagged it as malicious ({malicious_count} engines)" + return False, None + except Exception as e: + print(f"⚠️ VirusTotal error: {e}") + + + + return False, None + + + + + + + + + + +@app.route('/kmeans-clustering') +def clustering(): + return render_template('clustering.html') + +#image code +@app.route('/kmeans-Dbscan-image', methods=['GET', 'POST']) +def compress_and_clean(): + final_image = None + + if request.method == 'POST': + try: + # Get form values + mode = request.form.get('mode', 'compress') + k = int(request.form.get('k', 8)) + eps = float(request.form.get('eps', 0.6)) + min_samples = int(request.form.get('min_samples', 50)) + image_file = request.files.get('image') + + if image_file and image_file.filename != '': + # Load image + img = Image.open(image_file).convert('RGB') + max_size = (518, 518) + img.thumbnail(max_size, Image.Resampling.LANCZOS) + + img_np = np.array(img) + h, w, d = img_np.shape + pixels = img_np.reshape(-1, d) + + # Apply KMeans + kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) + kmeans.fit(pixels) + clustered_pixels = kmeans.cluster_centers_[kmeans.labels_].astype(np.uint8) + + # Mode 1: Just Compress + if mode == 'compress': + final_pixels = clustered_pixels.reshape(h, w, d) + + # Mode 2: Compress + Clean (KMeans + DBSCAN) + else: + # Sample to avoid MemoryError + max_dbscan_pixels = 10000 + if len(clustered_pixels) > max_dbscan_pixels: + idx = np.random.choice(len(clustered_pixels), max_dbscan_pixels, replace=False) + dbscan_input = clustered_pixels[idx] + else: + dbscan_input = clustered_pixels + + # DBSCAN + # For DBSCAN: use only 10,000 pixels max + max_dbscan_pixels = 10000 + + scaler = StandardScaler() + pixels_scaled = scaler.fit_transform(dbscan_input) + db = DBSCAN(eps=eps, min_samples=min_samples) + labels = db.fit_predict(pixels_scaled) + + # Clean noisy pixels + clean_pixels = [] + for i in range(len(dbscan_input)): + label = labels[i] + clean_pixels.append([0, 0, 0] if label == -1 else dbscan_input[i]) + + # Fill extra if sampling was used + if len(clustered_pixels) > max_dbscan_pixels: + clean_pixels.extend([[0, 0, 0]] * (len(clustered_pixels) - len(clean_pixels))) + + final_pixels = np.array(clean_pixels, dtype=np.uint8).reshape(h, w, d) + + # Save final image + final_img = Image.fromarray(final_pixels) + final_image = 'compressed_clean.jpg' + final_img.save(os.path.join(app.config['UPLOAD_FOLDER'], final_image), optimize=True, quality=90) + + except Exception as e: + return f"⚠️ Error: {str(e)}", 500 + + return render_template('kmean-dbscan-image.html', final_image=final_image) + +@app.route('/DBscan') +def DBSCAN(): + return render_template('DBSCAN.html') + + +#test routs start here + + +@app.route('/Test-layout') +def test(): + return render_template('Test-layout.html') + +@app.route('/Test-home') +def Test_home(): + return render_template('Test-home.html',active_page='Test-home') + +@app.route('/Test-supervise') +def Test_supervise(): + return render_template('Test/Test-supervise.html', active_page='Test-supervise') + + +@app.route('/Test-unsupervised') +def Test_unsupervised(): + return render_template('Test/Test-unsupervised.html', active_page='Test-unsupervised') + +# Semi-Supervised Learning page +@app.route('/Test-semi-supervised') +def Test_semi_supervised(): + return render_template('Test/Test-semi_supervised.html', active_page='Test-semi_supervised') + +# Reinforcement Learning page +@app.route('/Test-reinforcement') +def Test_reinforcement(): + return render_template('Test/Test-reinforcement.html', active_page='Test-reinforcement') + +# Ensemble Learning page +@app.route('/Test-ensemble') +def Test_ensemble(): + return render_template('Test/Test-ensemble.html', active_page='Test-ensemble') + +#Templates/Test/Quiz-Overview-Page.html +@app.route('/linear-Quiz-Overview-Page') +def linear_Test_quiz_overview(): + return render_template('Test/linear-Quiz-Overview-Page.html', active_page='linear-Quiz-Overview-Page') + + +@app.route('/Quiz-test') +def Quiz_test(): + return render_template('Test/Quiz-test.html', active_page='Quiz-test') +#if the dtat file doesnt show or dsiapay use render_data like this render_template('data/yourfile.json') + +# @app.route('/Quiz-test/') +# def quiz_topic(topic): +# import json, os +# count = int(request.args.get('count', 10)) +# try: +# json_path = os.path.join(app.root_path, 'data', f'{topic}.json') +# with open(json_path, 'r', encoding='utf-8') as f: +# data = json.load(f) # This is your JSON array + +# # Transform the JSON to match frontend expectations +# transformed = [] +# for q in data[:count]: +# transformed.append({ +# "id": q.get("id"), +# "question": q.get("questionText"), +# "options": q.get("options"), +# "answer": q.get("options")[q.get("correctAnswerIndex")], +# "explanation": q.get("explanation") +# }) + +# return jsonify(transformed) + +# except FileNotFoundError: +# return "Topic not found", 404 +# except json.JSONDecodeError: +# # return "Invalid JSON file", 500 + +# @app.route('/Quiz-test/') +# def quiz_topic(topic): +# import os, json +# count = int(request.args.get('count', 10)) +# json_path = os.path.join(app.root_path, 'data', f'{topic}.json') + +# try: +# with open(json_path, 'r', encoding='utf-8') as f: +# data = json.load(f) + +# # If JSON is a dict with "questions" key +# if isinstance(data, dict) and "questions" in data: +# questions = data["questions"][:count] +# elif isinstance(data, list): +# questions = data[:count] +# else: +# return "Invalid JSON structure", 400 + +# return jsonify(questions) +# except FileNotFoundError: +# return "Topic not found", 404 +# except json.JSONDecodeError: +# return "Invalid JSON file", 400 + +# ✅ API Route: Send JSON quiz data +@app.route('/api/quiz/') +def get_quiz(topic): + count = int(request.args.get('count', 10)) + file_path = os.path.join('data', f'{topic}.json') + + if not os.path.exists(file_path): + return jsonify({'error': 'Topic not found'}), 404 + + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + questions = data.get('questions', [])[:count] + return jsonify({'questions': questions}) + + +@app.route('/polynomial-Quiz') +def polynomial_Test_quiz(): + return render_template('Test/polynomial-Quiz.html', active_page='polynomial-Quiz') + +# ------------------------------- +# Regression Algorithms +# ------------------------------- +@app.route('/ridge-regression-test') +def ridge_regression_test(): + return render_template('Test/ridge-regression-test.html', active_page='ridge-regression-test') + +@app.route('/lasso-regression-test') +def lasso_regression_test(): + return render_template('Test/lasso-regression-test.html', active_page='lasso-regression-test') + +@app.route('/svr-test') +def svr_test(): + return render_template('Test/svr-r-test.html', active_page='svr-r-test') + +@app.route('/decision-tree-regression-test') +def decision_tree_regression_test(): + return render_template('Test/decision-tree-regression-test.html', active_page='decision-tree-regression-test') + +@app.route('/random-forest-regression-test') +def random_forest_regression_test(): + return render_template('Test/random-forest-regression-test.html', active_page='random-forest-regression-test') + + +# ------------------------------- +# Classification Algorithms +# ------------------------------- +@app.route('/logistic-regression-test') +def logistic_regression_test(): + return render_template('Test/logistic-regression-test.html', active_page='logistic-regression-test') + +@app.route('/svm-c-test') +def svm_test(): + return render_template('Test/svm-c-test.html', active_page='svm-c-test') + +@app.route('/decision-trees-c-test') +def decision_trees_test(): + return render_template('Test/decision-trees-c-test.html', active_page='decision-trees-c-test') + +@app.route('/random-forest-c-test') +def random_forest_test(): + return render_template('Test/random-forest-c-test.html', active_page='random-forest-c-test') + +@app.route('/gradient-descent-test') +def gradient_descent_test(): + return render_template('Test/gradient-descent-test.html', active_page='gradient-descent-test') + +@app.route('/gradient-boosting-test') +def gradient_boosting_test(): + return render_template('Test/gradient-boosting-test.html', active_page='gradient-boosting-test') + +@app.route('/xgboost-regression-test') +def xgboost_regression_test(): + return render_template('Test/xgboost-regression-test.html', active_page='xgboost-regression-test') + +@app.route('/lightgbm-test') +def lightgbm_test(): + return render_template('Test/lightgbm-test.html', active_page='lightgbm-test') + +@app.route('/knn-test') +def knn_test(): + return render_template('Test/knn-test.html', active_page='knn-test') + +@app.route('/naive-bayes-test') +def naive_bayes_test(): + return render_template('Test/naive-bayes-test.html', active_page='naive-bayes-test') + +@app.route('/neural-networks-test') +def neural_networks_test(): + return render_template('Test/neural-networks-test.html', active_page='neural-networks-test') + + +# ------------------------------- +# Clustering +# ------------------------------- +@app.route('/k-means-test') +def k_means_test(): + return render_template('Test/k-means-test.html', active_page='k-means-test') + +@app.route('/hierarchical-clustering-test') +def hierarchical_clustering_test(): + return render_template('Test/hierarchical-clustering-test.html', active_page='hierarchical-clustering-test') + +@app.route('/dbscan-test') +def dbscan_test(): + return render_template('Test/dbscan-test.html', active_page='dbscan-test') + +@app.route('/gmm-test') +def gmm_test(): + return render_template('Test/gmm-test.html', active_page='gmm-test') + + +# ------------------------------- +# Dimensionality Reduction +# ------------------------------- +@app.route('/pca-test') +def pca_test(): + return render_template('Test/pca-test.html', active_page='pca-test') + +@app.route('/tsne-test') +def tsne_test(): + return render_template('Test/tsne-test.html', active_page='tsne-test') + +@app.route('/lda-test') +def lda_test(): + return render_template('Test/lda-test.html', active_page='lda-test') + +@app.route('/ica-test') +def ica_test(): + return render_template('Test/ica-test.html', active_page='ica-test') + + +# ------------------------------- +# Association Rule Learning +# ------------------------------- +@app.route('/apriori-test') +def apriori_test(): + return render_template('Test/apriori-test.html', active_page='apriori-test') + +@app.route('/eclat-test') +def eclat_test(): + return render_template('Test/eclat-test.html', active_page='eclat-test') + + +# ------------------------------- +# Semi-Supervised Learning +# ------------------------------- +@app.route('/generative-models-test') +def generative_models_test(): + return render_template('Test/generative-models-test.html', active_page='generative-models-test') + +@app.route('/self-training-test') +def self_training_test(): + return render_template('Test/self-training-test.html', active_page='self-training-test') + +@app.route('/transductive-svm-test') +def transductive_svm_test(): + return render_template('Test/transductive-svm-test.html', active_page='transductive-svm-test') + +@app.route('/graph-based-methods-test') +def graph_based_methods_test(): + return render_template('Test/graph-based-methods-test.html', active_page='graph-based-methods-test') + + +# ------------------------------- +# Reinforcement Learning +# ------------------------------- +@app.route('/agent-environment-state-test') +def agent_environment_state_test(): + return render_template('Test/agent-environment-state-test.html', active_page='agent-environment-state-test') + +@app.route('/action-policy-test') +def action_policy_test(): + return render_template('Test/action-policy-test.html', active_page='action-policy-test') + +@app.route('/reward-value-function-test') +def reward_value_function_test(): + return render_template('Test/reward-value-function-test.html', active_page='reward-value-function-test') + +@app.route('/q-learning-test') +def q_learning_test(): + return render_template('Test/q-learning-test.html', active_page='q-learning-test') + +@app.route('/deep-reinforcement-learning-test') +def deep_reinforcement_learning_test(): + return render_template('Test/deep-reinforcement-learning-test.html', active_page='deep-reinforcement-learning-test') + + +# ------------------------------- +# Ensemble Methods +# ------------------------------- +@app.route('/bagging-test') +def bagging_test(): + return render_template('Test/bagging-test.html', active_page='bagging-test') + +@app.route('/boosting-test') +def boosting_test(): + return render_template('Test/boosting-test.html', active_page='boosting-test') + +@app.route('/stacking-test') +def stacking_test(): + return render_template('Test/stacking-test.html', active_page='stacking-test') + +@app.route('/voting-test') +def voting_test(): + return render_template('Test/voting-test.html', active_page='voting-test') + + + + + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=7860) + + + + +