Jacob Jaroya commited on
Commit
dc81b5f
1 Parent(s): ded3157

commit all

Browse files
Files changed (3) hide show
  1. Dockerfile +11 -0
  2. app.py +161 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI # Import the FastAPI framework for building APIs
2
+ from typing import List, Literal # Import typing hints for function annotations
3
+ from pydantic import BaseModel # Import BaseModel for creating data models
4
+ import uvicorn # Import uvicorn for running the FastAPI app
5
+ import pandas as pd # Import pandas library for data manipulation
6
+ import pickle, os # Import pickle and os modules for handling files and data serialization
7
+
8
+ # Define a function to load machine learning components
9
+ def load_ml_components(fp):
10
+ '''Load machine learning to re-use in app '''
11
+ with open(fp, 'rb') as f:
12
+ object = pickle.load(f) # Load a pickled object (machine learning model)
13
+ return object # Return the loaded object
14
+
15
+ # Define a Pydantic model for the input data
16
+ class Sepsis(BaseModel):
17
+ """
18
+ Represents the input data for the model prediction.
19
+
20
+ Attributes:
21
+ PlasmaGlucose (int): The plasma glucose level of the individual.
22
+ BloodWorkResult_1 (int): The result of blood work test 1.
23
+ BloodPressure (int): The blood pressure reading of the individual.
24
+ BloodWorkResult_2 (int): The result of blood work test 2.
25
+ BloodWorkResult_3 (int): The result of blood work test 3.
26
+ BodyMassIndex (float): The body mass index of the individual.
27
+ BloodWorkResult_4 (float): The result of blood work test 4.
28
+ Age (int): The age of the individual.
29
+
30
+ 'sepsis' is the target feature which holds 0 = Negative and 1 = Positive.
31
+ """
32
+ # Define the input features as class attributes
33
+
34
+ PlasmaGlucose : int
35
+ BloodWorkResult_1 : int
36
+ BloodPressure : int
37
+ BloodWorkResult_2 : int
38
+ BloodWorkResult_3 : int
39
+ BodyMassIndex : float
40
+ BloodWorkResult_4 : float
41
+ Age : int
42
+
43
+ # Setup
44
+ """
45
+ Get the absolute path of the current model file.
46
+ then extracts the directory path from the absolute path of the model file.
47
+ This is useful when we need to locate the file
48
+ relative to our script's location.
49
+ """
50
+ # Get the absolute path of the current directory
51
+ DIRPATH = os.path.dirname(os.path.realpath(__file__))
52
+
53
+ # Join the directory path with the model file name
54
+ ml_core_fp = os.path.join(DIRPATH, 'gradient_boosting_model.pkl')
55
+
56
+ # Define the labels manually
57
+ labels = ['Negative', 'Positive']
58
+
59
+ # Load the machine learning components
60
+ end2end_pipeline = load_ml_components(fp=ml_core_fp) # Load the machine learning model from the file
61
+
62
+ # Access the model step of the pipeline
63
+ model = end2end_pipeline.named_steps['model'] # Access the model component from the pipeline
64
+
65
+ # Create a dictionary to map index to labels
66
+ idx_to_labels = {i: l for (i, l) in enumerate(labels)}
67
+
68
+ # Print predictable labels and index-to-label mapping
69
+ print(f'\n[Info]Predictable labels: {labels}')
70
+ print(f'\n[Info]Indices to labels: {idx_to_labels}')
71
+
72
+ # Print information about the loaded model
73
+ print(f'\n[Info]ML components loaded - Model: {model}')
74
+
75
+ # Create the FastAPI application instance
76
+ app = FastAPI(title='Sepsis Prediction API') # Create a FastAPI instance with a title
77
+
78
+ # Define a route to handle the root endpoint
79
+ @app.get('/')
80
+ def root():
81
+ return{
82
+ "info" : "Sepsis Prediction API: This interface is about the prediction of sepsis disease of patients in ICU."
83
+ }
84
+
85
+
86
+ # Define a route to handle the prediction
87
+ @app.post('/classify')
88
+ def sepsis_classification(PlasmaGlucose : int,
89
+ BloodWorkResult_1 : int,
90
+ BloodPressure : int,
91
+ BloodWorkResult_2 : int,
92
+ BloodWorkResult_3 : int,
93
+ BodyMassIndex : float,
94
+ BloodWorkResult_4 : float,
95
+ Age : int):
96
+ # Define checkmarks for printing symbols
97
+ red_x = u"\u274C"
98
+ green_checkmark = "\033[32m" + u"\u2713" + "\033[0m" #u"\u2713"
99
+
100
+ try:
101
+ # # Create a dataframe from the input data, to solve the indexing issue, wrapp dict in a list
102
+ df = pd.DataFrame(
103
+ [ {
104
+ 'PlasmaGlucose': PlasmaGlucose,
105
+ 'BloodWorkResult_1(U/ml)': BloodWorkResult_1,
106
+ 'BloodPressure(mm Hg)': BloodPressure,
107
+ 'BloodWorkResult_2(mm)': BloodWorkResult_2,
108
+ 'BloodWorkResult_3(U/ml)': BloodWorkResult_3,
109
+ 'BodyMassIndex(kg/m)^2': BodyMassIndex,
110
+ 'BloodWorkResult_4(U/ml)':BloodWorkResult_4,
111
+ 'Age (years)':Age} ]
112
+ )
113
+ # Print input data as a dataframe
114
+ print(f'[Info]Input data as dataframe:\n{df.to_markdown()}')
115
+
116
+ # Predict using the loaded model
117
+ output = model.predict(df)
118
+ confidence_scores = model.predict_proba(df) # Predict the probabilities for each class
119
+ print(f'Considering the best confidence score, the output is: {output}')
120
+ print(f'Confidence scores: {confidence_scores}')
121
+
122
+ # Get index of predicted class
123
+ predicted_idx = output
124
+
125
+ # Store index then replace by the matching label
126
+ df['Predicted label'] = predicted_idx
127
+ predicted_label = df['Predicted label'].replace(idx_to_labels)
128
+ df['Predicted label'] = predicted_label
129
+
130
+ # Map predicted indices to labels
131
+ predicted_labels = [idx_to_labels[idx] for idx in output]
132
+
133
+ # Store the predicted probabilities for each class in the dataframe
134
+ for i, label in enumerate(labels):
135
+ df[f'Confidence_{label}'] = confidence_scores[:, i] * 100 # Convert to percentage
136
+
137
+ # Print the result with confidence scores as percentages
138
+ if predicted_labels:
139
+ i = 0
140
+ label = predicted_labels[0] # Get the first predicted label
141
+ confidence_score_percentage = max(confidence_scores[i]) * 100
142
+ print(f"{green_checkmark} This patient in ICU has been classified as Sepsis {label} with confidence of: {confidence_score_percentage:.1f}%")
143
+
144
+ msg = "Execution went fine"
145
+ code = 1
146
+ pred = df.to_dict("records")
147
+
148
+
149
+ except Exception as e:
150
+ print(f"\033[91m{red_x} An exception occurred: {str(e)}")
151
+ msg = "Execution did not go well"
152
+ code = 0
153
+ pred = None
154
+
155
+ # Create the API response
156
+ result = {"Execution_msg": msg, "execution_code": code, "prediction": pred}
157
+ return result
158
+
159
+ # Run the FastAPI application using uvicorn
160
+ if __name__ == "__main__":
161
+ uvicorn.run("main:app", host = "0.0.0.0", port = 8000, reload = True)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pytest
2
+ scikit-learn
3
+ fastapi[all]
4
+ pydantic
5
+ uvicorn
6
+ pypi-json
7
+ requests
8
+ pandas
9
+ tabulate