blabla
Browse files
main.py
CHANGED
@@ -3,7 +3,7 @@ from type.request.predict import PredictRequest
|
|
3 |
from type.response.predict import PredictResponse
|
4 |
from hugging_face import model, dataset
|
5 |
from transformer import transformer
|
6 |
-
from
|
7 |
|
8 |
hate_speech_model = model.load_hugging_face_model('model_rf.pkl')
|
9 |
hate_speech_dataset = dataset.load_dataset('data_clean.csv')
|
@@ -21,7 +21,7 @@ def healthz():
|
|
21 |
|
22 |
@app.post("/predict")
|
23 |
def predict(request: PredictRequest):
|
24 |
-
preprocessed_text =
|
25 |
predict_text = [preprocessed_text]
|
26 |
|
27 |
predict_text = tfidf.transform(predict_text)
|
|
|
3 |
from type.response.predict import PredictResponse
|
4 |
from hugging_face import model, dataset
|
5 |
from transformer import transformer
|
6 |
+
from pipeline import pipeline
|
7 |
|
8 |
hate_speech_model = model.load_hugging_face_model('model_rf.pkl')
|
9 |
hate_speech_dataset = dataset.load_dataset('data_clean.csv')
|
|
|
21 |
|
22 |
@app.post("/predict")
|
23 |
def predict(request: PredictRequest):
|
24 |
+
preprocessed_text = pipeline.preprocessing(request.predict_text)
|
25 |
predict_text = [preprocessed_text]
|
26 |
|
27 |
predict_text = tfidf.transform(predict_text)
|
preprocess/preprocess.py → pipeline/pipeline.py
RENAMED
@@ -3,9 +3,10 @@ import pandas as pd
|
|
3 |
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
4 |
import nltk
|
5 |
from nltk.corpus import stopwords
|
|
|
6 |
|
7 |
nltk.download('stopwords')
|
8 |
-
alay_dict = pd.read_csv('
|
9 |
alay_dict = alay_dict.rename(columns={0: 'original', 1: 'replacement'})
|
10 |
|
11 |
factory = StemmerFactory()
|
@@ -41,7 +42,7 @@ def remove_stopword(text):
|
|
41 |
def stemming(text):
|
42 |
return stemmer.stem(text)
|
43 |
|
44 |
-
def
|
45 |
text = lowercase(text) # 1
|
46 |
text = remove_nonaplhanumeric(text) # 2
|
47 |
text = remove_unnecessary_char(text) # 2
|
|
|
3 |
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
|
4 |
import nltk
|
5 |
from nltk.corpus import stopwords
|
6 |
+
import pathlib
|
7 |
|
8 |
nltk.download('stopwords')
|
9 |
+
alay_dict = pd.read_csv(pathlib.Path('new_kamusalay.csv').resolve(), encoding='latin-1', header=None)
|
10 |
alay_dict = alay_dict.rename(columns={0: 'original', 1: 'replacement'})
|
11 |
|
12 |
factory = StemmerFactory()
|
|
|
42 |
def stemming(text):
|
43 |
return stemmer.stem(text)
|
44 |
|
45 |
+
def preprocessing(text):
|
46 |
text = lowercase(text) # 1
|
47 |
text = remove_nonaplhanumeric(text) # 2
|
48 |
text = remove_unnecessary_char(text) # 2
|