|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.ensemble import IsolationForest |
|
from sklearn.preprocessing import StandardScaler |
|
import matplotlib.pyplot as plt |
|
from io import BytesIO |
|
|
|
|
|
st.title("Saif Check Anomalies") |
|
st.write("Upload an Excel file to detect anomalies") |
|
|
|
uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx","xls"]) |
|
|
|
if uploaded_file: |
|
|
|
df = pd.read_excel(uploaded_file) |
|
|
|
|
|
df = df.select_dtypes(include=[int, float]) |
|
|
|
|
|
scaler = StandardScaler() |
|
scaled_data = scaler.fit_transform(df) |
|
|
|
|
|
clf = IsolationForest(contamination=0.15, random_state=42) |
|
clf.fit(scaled_data) |
|
predictions = clf.predict(scaled_data) |
|
|
|
|
|
anomaly_indices = np.where(predictions == -1)[0] |
|
anomalies = df.iloc[anomaly_indices] |
|
|
|
|
|
num_anomalies = len(anomalies) |
|
st.subheader(f"Number of anomalies detected: {num_anomalies}") |
|
|
|
|
|
st.subheader("Anomalies Detected") |
|
st.write(anomalies) |
|
|
|
|
|
for col in df.columns: |
|
fig, ax = plt.subplots() |
|
ax.plot(df.index, df[col], label="Data") |
|
ax.scatter(anomaly_indices, df[col].iloc[anomaly_indices], color='red', label="Anomalies") |
|
ax.set_title(f"Anomalies in {col}") |
|
ax.legend() |
|
st.pyplot(fig) |