kompiangg commited on
Commit
6460fef
1 Parent(s): ae8ae26
main.py CHANGED
@@ -3,7 +3,7 @@ from type.request.predict import PredictRequest
3
  from type.response.predict import PredictResponse
4
  from hugging_face import model, dataset
5
  from transformer import transformer
6
- from preprocess import preprocess
7
 
8
  hate_speech_model = model.load_hugging_face_model('model_rf.pkl')
9
  hate_speech_dataset = dataset.load_dataset('data_clean.csv')
@@ -21,7 +21,7 @@ def healthz():
21
 
22
  @app.post("/predict")
23
  def predict(request: PredictRequest):
24
- preprocessed_text = preprocess.preprocess(request.predict_text)
25
  predict_text = [preprocessed_text]
26
 
27
  predict_text = tfidf.transform(predict_text)
 
3
  from type.response.predict import PredictResponse
4
  from hugging_face import model, dataset
5
  from transformer import transformer
6
+ from pipeline import pipeline
7
 
8
  hate_speech_model = model.load_hugging_face_model('model_rf.pkl')
9
  hate_speech_dataset = dataset.load_dataset('data_clean.csv')
 
21
 
22
  @app.post("/predict")
23
  def predict(request: PredictRequest):
24
+ preprocessed_text = pipeline.preprocessing(request.predict_text)
25
  predict_text = [preprocessed_text]
26
 
27
  predict_text = tfidf.transform(predict_text)
preprocess/preprocess.py → pipeline/pipeline.py RENAMED
@@ -3,9 +3,10 @@ import pandas as pd
3
  from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
4
  import nltk
5
  from nltk.corpus import stopwords
 
6
 
7
  nltk.download('stopwords')
8
- alay_dict = pd.read_csv('./new_kamusalay.csv', encoding='latin-1', header=None)
9
  alay_dict = alay_dict.rename(columns={0: 'original', 1: 'replacement'})
10
 
11
  factory = StemmerFactory()
@@ -41,7 +42,7 @@ def remove_stopword(text):
41
  def stemming(text):
42
  return stemmer.stem(text)
43
 
44
- def preprocess(text):
45
  text = lowercase(text) # 1
46
  text = remove_nonaplhanumeric(text) # 2
47
  text = remove_unnecessary_char(text) # 2
 
3
  from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
4
  import nltk
5
  from nltk.corpus import stopwords
6
+ import pathlib
7
 
8
  nltk.download('stopwords')
9
+ alay_dict = pd.read_csv(pathlib.Path('new_kamusalay.csv').resolve(), encoding='latin-1', header=None)
10
  alay_dict = alay_dict.rename(columns={0: 'original', 1: 'replacement'})
11
 
12
  factory = StemmerFactory()
 
42
  def stemming(text):
43
  return stemmer.stem(text)
44
 
45
+ def preprocessing(text):
46
  text = lowercase(text) # 1
47
  text = remove_nonaplhanumeric(text) # 2
48
  text = remove_unnecessary_char(text) # 2