akdiwahar commited on
Commit
92045c0
β€’
1 Parent(s): 9d2c818

Upload 9 files

Browse files
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: JupyterLab
3
- emoji: πŸ’»πŸ³
4
- colorFrom: gray
5
  colorTo: green
6
- sdk: docker
 
 
7
  pinned: false
8
- tags:
9
- - jupyterlab
10
- suggested_storage: small
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Machine Failure Predictor
3
+ emoji: πŸ“Š
4
+ colorFrom: purple
5
  colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 4.25.0
8
+ app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import joblib
4
+ import json
5
+
6
+ import gradio as gr
7
+ import pandas as pd
8
+
9
+ from huggingface_hub import CommitScheduler
10
+ from pathlib import Path
11
+
12
+ # Run the training script in the same directory
13
+
14
+ os.system("python train.py")
15
+
16
+ # Load the freshly trained model
17
+
18
+ machine_failure_predictor = joblib.load('model.joblib')
19
+
20
+ # Prepare the logging functionality
21
+
22
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
23
+ log_folder = log_file.parent
24
+
25
+ scheduler = CommitScheduler(
26
+ repo_id="machine-failure-logs",
27
+ repo_type="dataset",
28
+ folder_path=log_folder,
29
+ path_in_repo="data",
30
+ every=2
31
+ )
32
+
33
+ # Set up UI components for input and output
34
+
35
+ air_temperature_input = gr.Number(label='Air temperature [K]')
36
+ process_temperature_input = gr.Number(label='Process temperature [K]')
37
+ rotational_speed_input = gr.Number(label='Rotational speed [rpm]')
38
+ torque_input = gr.Number(label='Torque [Nm]')
39
+ tool_wear_input = gr.Number(label='Tool wear [min]')
40
+ type_input = gr.Dropdown(
41
+ ['L', 'M', 'H'],
42
+ label='Type'
43
+ )
44
+
45
+ model_output = gr.Label(label="Machine failure")
46
+
47
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
48
+ def predict_machine_failure(air_temperature, process_temperature, rotational_speed, torque, tool_wear, type):
49
+ sample = {
50
+ 'Air temperature [K]': air_temperature,
51
+ 'Process temperature [K]': process_temperature,
52
+ 'Rotational speed [rpm]': rotational_speed,
53
+ 'Torque [Nm]': torque,
54
+ 'Tool wear [min]': tool_wear,
55
+ 'Type': type
56
+ }
57
+ data_point = pd.DataFrame([sample])
58
+ prediction = machine_failure_predictor.predict(data_point).tolist()
59
+
60
+ with scheduler.lock:
61
+ with log_file.open("a") as f:
62
+ f.write(json.dumps(
63
+ {
64
+ 'Air temperature [K]': air_temperature,
65
+ 'Process temperature [K]': process_temperature,
66
+ 'Rotational speed [rpm]': rotational_speed,
67
+ 'Torque [Nm]': torque,
68
+ 'Tool wear [min]': tool_wear,
69
+ 'Type': type,
70
+ 'prediction': prediction[0]
71
+ }
72
+ ))
73
+ f.write("\n")
74
+
75
+ return prediction[0]
76
+
77
+ # Create the interface
78
+ demo = gr.Interface(
79
+ fn=predict_machine_failure,
80
+ inputs=[air_temperature_input, process_temperature_input, rotational_speed_input,
81
+ torque_input, tool_wear_input, type_input],
82
+ outputs=model_output,
83
+ title="Machine Failure Predictor",
84
+ description="This API allows you to predict the machine failure status of an equipment",
85
+ allow_flagging="auto",
86
+ concurrency_limit=8
87
+ )
88
+
89
+ # Launch with a load balancer
90
+ demo.queue()
91
+ demo.launch(share=False)
client_requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn==1.2.2
2
+ gradio-client==0.15.0
inference.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Obtain Predictions for Machine Failure Predictor Model using Gradio Client
3
+ ======================================================================
4
+
5
+ This script connects to a deployed machine failure predictor model using Gradio Client,
6
+ fetches the dataset, preprocesses the data, and generates predictions for a
7
+ sample of test data using the deployed model. The resulting predictions are
8
+ stored in a list. A time delay of one second is added after each prediction
9
+ submission to avoid overloading the model server.
10
+ """
11
+
12
+ import time
13
+
14
+ from gradio_client import Client
15
+
16
+ from sklearn.datasets import fetch_openml
17
+ from sklearn.model_selection import train_test_split
18
+
19
+
20
+ client = Client("pgurazada1/machine-failure-predictor")
21
+
22
+ dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
23
+
24
+ data_df = dataset.data
25
+
26
+ target = 'Machine failure'
27
+ numeric_features = [
28
+ 'Air temperature [K]',
29
+ 'Process temperature [K]',
30
+ 'Rotational speed [rpm]',
31
+ 'Torque [Nm]',
32
+ 'Tool wear [min]'
33
+ ]
34
+ categorical_features = ['Type']
35
+
36
+ X = data_df[numeric_features + categorical_features]
37
+ y = data_df[target]
38
+
39
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
40
+ X, y,
41
+ test_size=0.2,
42
+ random_state=42
43
+ )
44
+
45
+ Xtest_sample = Xtest.sample(100)
46
+
47
+ Xtest_sample_rows = list(Xtest_sample.itertuples(index=False, name=None))
48
+
49
+ batch_predictions = []
50
+
51
+ for row in Xtest_sample_rows:
52
+ try:
53
+ job = client.submit(
54
+ air_temperature=row[0],
55
+ process_temperature=row[1],
56
+ rotational_speed=row[2],
57
+ torque=row[3],
58
+ tool_wear=row[4],
59
+ type=row[5],
60
+ api_name="/predict"
61
+ )
62
+
63
+ batch_predictions.append(job.result())
64
+
65
+ time.sleep(1)
66
+
67
+ except Exception as e:
68
+ print(e)
machine_failure_prediction.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
machine_failure_prediction_log_monitoring.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0db284be28e1303ab3612a3a6e35076ff8e9e32c035dd4e2ffdf9635b940780
3
+ size 3838
requirements.txt CHANGED
@@ -1,4 +1 @@
1
- jupyterlab==3.6.1
2
- jupyter-server==2.3.0
3
- tornado==6.2
4
- ipywidgets
 
1
+ scikit-learn==1.2.2
 
 
 
train.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import joblib
3
+
4
+ from sklearn.datasets import fetch_openml
5
+
6
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
+ from sklearn.compose import make_column_transformer
8
+
9
+ from sklearn.pipeline import make_pipeline
10
+
11
+ from sklearn.model_selection import train_test_split, RandomizedSearchCV
12
+
13
+ from sklearn.linear_model import LogisticRegression
14
+ from sklearn.metrics import accuracy_score, classification_report
15
+
16
+ dataset = fetch_openml(data_id=42890, as_frame=True, parser="auto")
17
+
18
+ data_df = dataset.data
19
+
20
+ target = 'Machine failure'
21
+ numeric_features = [
22
+ 'Air temperature [K]',
23
+ 'Process temperature [K]',
24
+ 'Rotational speed [rpm]',
25
+ 'Torque [Nm]',
26
+ 'Tool wear [min]'
27
+ ]
28
+ categorical_features = ['Type']
29
+
30
+ print("Creating data subsets")
31
+
32
+ X = data_df[numeric_features + categorical_features]
33
+ y = data_df[target]
34
+
35
+ Xtrain, Xtest, ytrain, ytest = train_test_split(
36
+ X, y,
37
+ test_size=0.2,
38
+ random_state=42
39
+ )
40
+
41
+ preprocessor = make_column_transformer(
42
+ (StandardScaler(), numeric_features),
43
+ (OneHotEncoder(handle_unknown='ignore'), categorical_features)
44
+ )
45
+
46
+ model_logistic_regression = LogisticRegression(n_jobs=-1)
47
+
48
+ print("Estimating Best Model Pipeline")
49
+
50
+ model_pipeline = make_pipeline(
51
+ preprocessor,
52
+ model_logistic_regression
53
+ )
54
+
55
+ param_distribution = {
56
+ "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1, 5, 10]
57
+ }
58
+
59
+ rand_search_cv = RandomizedSearchCV(
60
+ model_pipeline,
61
+ param_distribution,
62
+ n_iter=3,
63
+ cv=3,
64
+ random_state=42
65
+ )
66
+
67
+ rand_search_cv.fit(Xtrain, ytrain)
68
+
69
+ print("Logging Metrics")
70
+ print(f"Accuracy: {rand_search_cv.best_score_}")
71
+
72
+ print("Serializing Model")
73
+
74
+ saved_model_path = "model.joblib"
75
+
76
+ joblib.dump(rand_search_cv.best_estimator_, saved_model_path)