dash-asg / ASG.API /ModelTEC.py
wasmdashai's picture
Update ASG.API/ModelTEC.py
30ae555 verified
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 17 11:27:28 2022
@author: anas
"""
import pickle
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import regex as re
# import spacy
# from spacy import displacy
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score,f1_score, precision_score, recall_score
#from googletrans import Translator
import requests
class ModelPums:
def __init__(self):
self.obData=None
self.obVec=None
self.obSVML=None
self.obSVMK=None
self.oblogict=None
self.X=None
self.Y=None
self.Classes=None
class TEC:
__Outs={'svmL':None,'svmK':None,'logstick':None,'target':None}
def createOuts():
ob=TEC.__Outs.copy()
for key in ob:
ob[key]=[]
return ob
def __init__(self,typemodel="svmL",model_setting=None,spt=0.3,nlp=None):
self.model_setting=model_setting
self.isLoad=False
self.nlp=nlp
def toclean(self,txt):
try:
url="https://ansaltwyl256.pythonanywhere.com/api/nlp/"+txt
response = requests.get(url)
return response.json()['description']
except:
return "$"
def loadmodel(self):
mm=ModelPums()
matrck=pickle.load(open(self.model_setting.path_model,'rb'))
self.DES=pickle.load(open(self.model_setting.path_Qwords,'rb'))
mm.obData=matrck['obData']
mm.obVec=TfidfVectorizer(norm='l2')
mm.obVec.fit(mm.obData)
# mm.obVec=matrck['obVec']
mm.obSVML=matrck['obSVML']
mm.obSVMK=matrck['obSVMK']
mm.oblogict=matrck['oblogict']
mm.X=matrck['X']
mm.Y=matrck['Y']
mm.Classes=matrck['Classes']
self.obMP=mm
if self.model_setting.path_Qwords!="":
self.Qwords={}#pickle.load(open(model_setting.path_Qwords,'rb'))
else:
self.Qwords={}
#self.detector = Translator()
# self.Splits(spt)
self.Model=self.obMP.obSVML
self.name=self.model_setting.name
self.pipeline= None
self.isLoad=True
def getLables(self):
return self.obMP.Classes
def is_found(self,words,ob):
sms=[]
ob=ob.lower().strip()
for w in words:
if w==ob:
return 1,w
sms.append(self.similarity(w,ob))
index=np.argmax(sms)
if sms[index]>0.7:
return 2, list(words)[index]
return 0,''
def is_found_K(self,words,ob):
ob=ob.lower().strip()
for w in words:
if w==ob:
return 1,w
return -1,''
def Training(self):
self.obMP.obSVMK.fit(self.obMP.X,self.obMP.Y)
def clean_dataT(self,data,typw='',is_input=False):
d,_=self.clean_dataAPI(data)
return d
datac=data
dock=[]
is_found= self.is_found if is_input==True else self.is_found_K
reg=r"([0-9])|(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"
for datac in data:
strr=str(datac)
strr=re.sub(reg, "", strr)
doc=self.nlp(strr)
disc=""
for token in doc:
if not token.is_punct and not token.is_stop and not token.is_space and token.is_alpha:
if token.pos_=='ADJ' or token.pos_=='NOUN' or token.pos_=='VERB':#token.pos_== typw:
qk,key=is_found(self.Qwords,token.lemma_)
if qk==1:
disc=disc+self.Qwords[token.lemma_]+" "
elif qk==2:
disc=disc+self.Qwords[key]+" "
elif qk==-1:
disc=disc+token.lemma_+" "
disc=disc.lower().strip()
if len(disc)>0:
dock.append(disc)
return dock
def similarity(self,ob1,ob2):
ob1=ob1
ob2=ob2
nob1=self.nlp(ob1)
return nob1.similarity(self.nlp(ob2))
#-----------------#
def clean_data(self,data,typw='',is_input=False):
return self.clean_dataAPI(data)
datac=data
dock=[]
labels=[]
is_found= self.is_found if is_input==True else self.is_found_K
reg=r"([0-9])|(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"
for (datac,label) in data:
strr=str(datac)
strr=re.sub(reg, "", strr)
doc=self.nlp(strr)
disc=""
for token in doc:
if not token.is_punct and not token.is_stop and not token.is_space and token.is_alpha:
if True: # token.pos_=='ADJ' or token.pos_=='NOUN' or token.pos_=='VERB' :
# disc=disc+token.lemma_+" "
qk,key=is_found(self.Qwords,token.lemma_)
# print(qk,key)
if qk==1:
disc=disc+self.Qwords[key]+" "
elif qk==2:
disc=disc+self.Qwords[key]+" "
elif qk==-1:
disc=disc+token.lemma_+" "
if len(disc)>2:
dock.append(disc.strip())
labels.append(label)
return dock,labels
#-----------------#
def clean_dataAPI(self,data,typw='',is_input=False):
txt=self.toclean(data)
if txt !="$":
return [txt],6
reg=r"([0-9])|(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"
strr=str(data)
strr=re.sub(reg, "", strr)
return [strr],6
def setPipeline(self,model=None):
self.pipeline=model
def to_tran(self,text,dest='en'):
ff=True
c=0
while ff:
try:
t=self.detector.translate(str(text),dest=dest)
ff=False
text=t.text
except:
c+=1
if c==20:
ff=False
print(' no connenet Tr')
return text
def Predict_ALL(self,description,istrans=False):
if istrans:
description=self.to_tran(description)
if self.pipeline is not None:
text_output,_,outs=self.pipeline.Predict_ALL(description)
for key in outs:
text_output=text_output+'--'+ outs[key]
else:
text_output=''
mx,outs=self.get_ptedict_proba(description+' '+text_output)
return text_output,mx,outs
def predictAPI(self,description,is_input=False,istrans=False):
if istrans:
description=self.to_tran(description)
clean_description,_=self.clean_dataAPI(description,'',is_input=is_input)
try:
features=self.obMP.obVec.transform(clean_description)
yp=self.Model.predict(features)
txttec=self.obMP.Classes[yp[0]]
dis=self.DES[txttec]
except:
txttec='No'
dis=" ....."
return txttec,dis
def predict(self,description,is_input=False,istrans=False):
if istrans:
description=self.to_tran(description)
clean_description,_=self.clean_data([(description,' ')],'',is_input=is_input)
try:
features=self.obMP.obVec.transform(clean_description)
yp=self.Model.predict(features)
txttec=self.obMP.Classes[yp[0]]
except:
txttec='No'
return txttec
def predict_ids(self,description,is_input=False,istrans=False):
if istrans:
description=self.to_tran(description)
# clean_description,_=self.clean_data([(description,' ')],'',is_input=is_input)
try:
features=self.obMP.obVec.transform(description)
yp=self.Model.predict(features)
# txttec=self.obMP.Classes[yp[0]]
except:
yp='No'
return yp
def ptedict_proba(self,description,mx=[],is_input=False,istrans=False):
if istrans:
description=self.to_tran(description)
clean_description,_=self.clean_data([(description,' ')],'',is_input=is_input)
try:
features=self.obMP.obVec.transform(clean_description)
mx=self.Model.predict_proba(features)
print(np.int16(mx*100))
yp=np.argmax(mx)
print(yp)
txttec='Technique : '+ self.obMP.Classes[yp]
except:
txttec='No Found technique ...! (^_^)'
return txttec
def get_ptedict_proba(self,description,mx=[]):
clean_description,_=self.clean_data([(description,' ')],'')
try:
features=self.obMP.obVec.transform(clean_description)
mx=self.obMP.obSVML.predict_proba(features)
yk=self.obMP.obSVMK.predict(features)
# yp=np.argmax(mx)
outputs={'svmK':self.obMP.Classes[yk[0]]}
except:
txttec='No Found technique ...! (^_^)'
outputs={}
return mx,outputs
def get_ptedict_threemodel(self,description):
clean_description,_=self.clean_data([(description,' ')],'')
try:
features=self.obMP.obVec.transform(clean_description)
yl=self.obMP.obSVML.predict(features)
yk=self.obMP.obSVMK.predict(features)
ym=self.obMP.oblogict.predict(features)
outputs={'svmL':self.obMP.Classes[yl[0]],
'svmK':self.obMP.Classes[yk[0]],
'logstick':self.obMP.Classes[ym[0]]
}
except:
print('No Found technique ...! (^_^)' )
outputs={}
return outputs
def verification(self,inputs=[],outputs=[]):
out_prodect=TEC.createOuts()
unprocess=0
meta={"tf":TEC.createOuts(),"num":TEC.createOuts()}
names=list(self.obMP.Classes)
for i in range(len(outputs)):
try:
outs=self.get_ptedict_threemodel(inputs[i])
target=outputs[i].strip()
names.index(target)
outs['target']=target
for key in outs:
out_prodect[key].append(outs[key])
meta['tf'][key].append(int(outs[key]==target))
meta['num'][key].append(names.index(outs[key]))
except :
unprocess+=1
scores={}
for key in meta['num']:
if key!='target':
scores[key]=self.valmodel(meta['num']['target'],meta['num'][key],' model '+key)
return out_prodect,meta,scores
def valmodel(self,y,yp,titel=" "):
print('---------------'+titel+'------------------------' )
cr=classification_report(y, yp)
print(cr)
scores={}
scores['accuracy']=accuracy_score(y, yp)
scores['f1_score']=f1_score(y, yp, average="macro")
scores['precision']=precision_score(y, yp, average="macro")
scores['recall']=recall_score(y, yp, average="macro")
return {'smmray':cr,'scores':scores}
#----------------------
def ChangeModel(self,ob=None):
if ob==None:return
if type(ob) is not str:
self.Model=ob
else :
if ob=='svmL':
self.Model=self.obMP.obSVML
elif ob=='svmK':
self.Model=self.obMP.obSVMK
else:
self.Model=self.obMP.oblogict
#------------------------------
def Search(self):
txt=input('Enter any text :')
print('Technique:'+ self.predict(txt))
def Info_Models(self):
print('Number Data is ',self.obMP.X.shape)
print('Number of classes :',len(self.obMP.Classes))
print ('---------simples -------------------')
n=len(self.obMP.Classes)
f=n%2
n=n-f
for i in range(0,n,2):
print( (self.obMP.Classes[i], np.sum(np.int16(self.obMP.Y==i))),'------------------',(self.obMP.Classes[i+1], np.sum(np.int16(self.obMP.Y==i+1))))
if f==1: print( (self.obMP.Classes[i+2], np.sum(np.int16(self.obMP.Y==i+2))))
def DlistModel(self):
print ('SVC(kernel=\'linear\')-> svmL')
print('LinearSVC(C=1.0) -> svmK ')
print('LogisticRegression() -> logic')