Priyanka-Kumavat-At-TE's picture
Upload 19 files
2fc2c1f
#!/usr/local/bin/python3
# avenir-python: Machine Learning
# Author: Pranab Ghosh
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You may
# obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License.
# Package imports
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import sklearn as sk
import matplotlib
import random
import jprops
from io import StringIO
from sklearn.model_selection import cross_val_score
import joblib
from random import randint
from io import StringIO
from sklearn.linear_model import LinearRegression
sys.path.append(os.path.abspath("../lib"))
from util import *
from mlutil import *
from pasearch import *
class BaseRegressor(object):
"""
base regression class
"""
def __init__(self, configFile, defValues):
"""
intializer
"""
defValues["common.mode"] = ("train", None)
defValues["common.model.directory"] = ("model", None)
defValues["common.model.file"] = (None, None)
defValues["common.scale.file.path"] = (None, "missing scale file path")
defValues["common.preprocessing"] = (None, None)
defValues["common.verbose"] = (False, None)
defValues["train.data.file"] = (None, "missing training data file")
defValues["train.data.fields"] = (None, "missing training data field ordinals")
defValues["train.data.feature.fields"] = (None, "missing training data feature field ordinals")
defValues["train.data.out.field"] = (None, "missing out field ordinal")
self.config = Configuration(configFile, defValues)
self.featData = None
self.outData = None
self.regressor = None
self.verbose = self.config.getBooleanConfig("common.verbose")[0]
self.mode = self.config.getBooleanConfig("common.mode")[0]
logFilePath = self.config.getStringConfig("common.logging.file")[0]
logLevName = self.config.getStringConfig("common.logging.level")[0]
self.logger = createLogger(__name__, logFilePath, logLevName)
self.logger.info("********* starting session")
def initConfig(self, configFile, defValues):
"""
initialize config
"""
self.config = Configuration(configFile, defValues)
def getConfig(self):
"""
get config object
"""
return self.config
def setConfigParam(self, name, value):
"""
set config param
"""
self.config.setParam(name, value)
def getMode(self):
"""
get mode
"""
return self.mode
def train(self):
"""
train model
"""
#build model
self.buildModel()
# training data
if self.featData is None:
(featData, outData) = self.prepData("train")
(self.featData, self.outData) = (featData, outData)
else:
(featData, outData) = (self.featData, self.outData)
# parameters
modelSave = self.config.getBooleanConfig("train.model.save")[0]
#train
self.logger.info("...training model")
self.regressor.fit(featData, outData)
rsqScore = self.regressor.score(featData, outData)
coef = self.regressor.coef_
intc = self.regressor.intercept_
result = (rsqScore, intc, coef)
if modelSave:
self.logger.info("...saving model")
modelFilePath = self.getModelFilePath()
joblib.dump(self.regressor, modelFilePath)
return result
def validate(self):
# create model
self.prepModel()
# prepare test data
(featData, outDataActual) = self.prepData("validate")
#predict
self.logger.info("...predicting")
outDataPred = self.regressor.predict(featData)
#error
rsqScore = self.regressor.score(featData, outDataActual)
result = (outDataPred, rsqScore)
return result
def predict(self):
"""
predict using trained model
"""
# create model
self.prepModel()
# prepare test data
featData = self.prepData("predict")[0]
#predict
self.logger.info("...predicting")
outData = self.regressor.predict(featData)
return outData
def prepData(self, mode):
"""
loads and prepares data for training and validation
"""
# parameters
key = mode + ".data.file"
dataFile = self.config.getStringConfig(key)[0]
key = mode + ".data.fields"
fieldIndices = self.config.getStringConfig(key)[0]
if not fieldIndices is None:
fieldIndices = strToIntArray(fieldIndices, ",")
key = mode + ".data.feature.fields"
featFieldIndices = self.config.getStringConfig(key)[0]
if not featFieldIndices is None:
featFieldIndices = strToIntArray(featFieldIndices, ",")
if not mode == "predict":
key = mode + ".data.out.field"
outFieldIndex = self.config.getIntConfig(key)[0]
#load data
(data, featData) = loadDataFile(dataFile, ",", fieldIndices, featFieldIndices)
if (self.config.getStringConfig("common.preprocessing")[0] == "scale"):
featData = sk.preprocessing.scale(featData)
outData = None
if not mode == "predict":
outData = extrColumns(data, outFieldIndex)
return (featData, outData)
def prepModel(self):
"""
load saved model or train model
"""
useSavedModel = self.config.getBooleanConfig("predict.use.saved.model")[0]
if (useSavedModel and not self.regressor):
# load saved model
self.logger.info("...loading saved model")
modelFilePath = self.getModelFilePath()
self.regressor = joblib.load(modelFilePath)
else:
# train model
self.train()
class LinearRegressor(BaseRegressor):
"""
linear regression
"""
def __init__(self, configFile):
defValues = {}
defValues["train.normalize"] = (False, None)
super(LinearRegressor, self).__init__(configFile, defValues)
def buildModel(self):
"""
builds model object
"""
self.logger.info("...building linear regression model")
normalize = self.config.getBooleanConfig("train.normalize")[0]
self.regressor = LinearRegression(normalize=normalize)
class ElasticNetRegressor(BaseRegressor):
"""
elastic net regression
"""
def __init__(self, configFile):
defValues = {}
defValues["train.alpha"] = (1.0, None)
defValues["train.loneratio"] = (0.5, None)
defValues["train.normalize"] = (False, None)
defValues["train.precompute"] = (False, None)
defValues["train.max.iter"] = (1000, None)
defValues["train.tol"] = (0.0001, None)
defValues["train.random.state"] = (None, None)
defValues["train.selection"] = ("cyclic", None)
super(ElasticNetRegressor, self).__init__(configFile, defValues)
def buildModel(self):
"""
builds model object
"""
self.logger.info("...building elastic net regression model")
alpha = self.config.getFloatConfig("train.alpha")[0]
loneratio = self.config.getFloatConfig("train.loneratio")[0]
normalize = self.config.getBooleanConfig("train.normalize")[0]
precompute = self.config.getBooleanConfig("train.precompute")[0]
maxIter = self.config.getIntConfig("train.max.iter")[0]
tol = self.config.getFloatConfig("train.tol")[0]
randState = self.config.getIntConfig("train.random.state")[0]
selection = self.config.getIntConfig("train.selection")[0]
self.regressor = ElasticNet(alpha=alpha, l1_ratio=loneratio, normalize=normalize, precompute=precompute,
max_iter=maxIter, tol=tol, random_state=randState, selection=selection)