kompiangg commited on
Commit
e316253
1 Parent(s): a2aa6b8

init commit

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+ __pycache__
3
+ *.csv
hugging_face/dataset.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset as hf_load_dataset
2
+ from pandas import DataFrame, read_csv
3
+
4
+ REPO_ID = "kompiangg/twitter_hate_speech_classification"
5
+
6
+ def load_dataset(filename) -> DataFrame:
7
+ try:
8
+ df = read_csv(filename)
9
+ except:
10
+ datasets = hf_load_dataset(REPO_ID, data_files=filename, encoding='latin-1')
11
+ df = DataFrame(data=datasets['train'])
12
+ df.to_csv(filename, index=False)
13
+
14
+ return df
hugging_face/model.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import joblib
3
+
4
+ REPO_ID = "kompiangg/svm-hate-speech-classification"
5
+
6
+ def load_hugging_face_model(filename):
7
+ return joblib.load(hf_hub_download(REPO_ID, filename))
main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from type.request.predict import PredictRequest
3
+ from type.response.predict import PredictResponse
4
+ from hugging_face import model, dataset
5
+ from transformer import transformer
6
+
7
+ import sys
8
+
9
+ hate_speech_model = model.load_hugging_face_model('model_svm.pkl')
10
+ hate_speech_dataset = dataset.load_dataset('data_clean.csv')
11
+ tfidf = transformer.create_tfidf(hate_speech_dataset, 'Tweet', 'U')
12
+
13
+ app = FastAPI()
14
+
15
+ @app.get("/healthz")
16
+ def healthz():
17
+ return {"message": "All system running well :)"}
18
+
19
+ @app.post("/predict")
20
+ def predict(request: PredictRequest):
21
+ predict_text = [request.predict_text]
22
+
23
+ predict_text = tfidf.transform(predict_text)
24
+ prediction = hate_speech_model.predict(predict_text)
25
+
26
+ return PredictResponse(
27
+ predict_text = request.predict_text,
28
+ is_hate_speech = prediction[0] == 1
29
+ )
transformer/transformer.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+
3
+ def create_tfidf(dataset, feature, label):
4
+ tfidf = TfidfVectorizer()
5
+ return tfidf.fit(dataset[feature].astype(label))
type/request/predict.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class PredictRequest(BaseModel):
4
+ predict_text: str
type/response/predict.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class PredictResponse(BaseModel):
4
+ predict_text: str
5
+ is_hate_speech: bool