init commit
Browse files- .gitignore +3 -0
- hugging_face/dataset.py +14 -0
- hugging_face/model.py +7 -0
- main.py +29 -0
- transformer/transformer.py +5 -0
- type/request/predict.py +4 -0
- type/response/predict.py +5 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
venv
|
2 |
+
__pycache__
|
3 |
+
*.csv
|
hugging_face/dataset.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset as hf_load_dataset
|
2 |
+
from pandas import DataFrame, read_csv
|
3 |
+
|
4 |
+
REPO_ID = "kompiangg/twitter_hate_speech_classification"
|
5 |
+
|
6 |
+
def load_dataset(filename) -> DataFrame:
|
7 |
+
try:
|
8 |
+
df = read_csv(filename)
|
9 |
+
except:
|
10 |
+
datasets = hf_load_dataset(REPO_ID, data_files=filename, encoding='latin-1')
|
11 |
+
df = DataFrame(data=datasets['train'])
|
12 |
+
df.to_csv(filename, index=False)
|
13 |
+
|
14 |
+
return df
|
hugging_face/model.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import hf_hub_download
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
REPO_ID = "kompiangg/svm-hate-speech-classification"
|
5 |
+
|
6 |
+
def load_hugging_face_model(filename):
|
7 |
+
return joblib.load(hf_hub_download(REPO_ID, filename))
|
main.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from type.request.predict import PredictRequest
|
3 |
+
from type.response.predict import PredictResponse
|
4 |
+
from hugging_face import model, dataset
|
5 |
+
from transformer import transformer
|
6 |
+
|
7 |
+
import sys
|
8 |
+
|
9 |
+
hate_speech_model = model.load_hugging_face_model('model_svm.pkl')
|
10 |
+
hate_speech_dataset = dataset.load_dataset('data_clean.csv')
|
11 |
+
tfidf = transformer.create_tfidf(hate_speech_dataset, 'Tweet', 'U')
|
12 |
+
|
13 |
+
app = FastAPI()
|
14 |
+
|
15 |
+
@app.get("/healthz")
|
16 |
+
def healthz():
|
17 |
+
return {"message": "All system running well :)"}
|
18 |
+
|
19 |
+
@app.post("/predict")
|
20 |
+
def predict(request: PredictRequest):
|
21 |
+
predict_text = [request.predict_text]
|
22 |
+
|
23 |
+
predict_text = tfidf.transform(predict_text)
|
24 |
+
prediction = hate_speech_model.predict(predict_text)
|
25 |
+
|
26 |
+
return PredictResponse(
|
27 |
+
predict_text = request.predict_text,
|
28 |
+
is_hate_speech = prediction[0] == 1
|
29 |
+
)
|
transformer/transformer.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
2 |
+
|
3 |
+
def create_tfidf(dataset, feature, label):
|
4 |
+
tfidf = TfidfVectorizer()
|
5 |
+
return tfidf.fit(dataset[feature].astype(label))
|
type/request/predict.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class PredictRequest(BaseModel):
|
4 |
+
predict_text: str
|
type/response/predict.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class PredictResponse(BaseModel):
|
4 |
+
predict_text: str
|
5 |
+
is_hate_speech: bool
|