| import numpy as np |
| import pandas as pd |
| from sklearn.feature_extraction.text import CountVectorizer |
| from sklearn.model_selection import train_test_split |
|
|
| |
| api_key = '9283c8042f454c4d8638fca0484512ab' |
| endpoint = 'https://centralindia.api.cognitive.microsoft.com/sts/v1.0/issuetoken' |
| import azure.cognitiveservices.speech as speechsdk |
| |
| import warnings |
|
|
| data = pd.read_csv('hate_speech_detection.csv') |
| X = data['raw_data'] |
| y = data['class'] |
| cv = CountVectorizer() |
| X_ = cv.fit_transform(X) |
| X_ = X_.toarray() |
| X_train, X_test, y_train, y_test = train_test_split(X_, y, test_size=0.4, random_state=True) |
|
|
| model = LogisticRegression(max_iter=1000) |
| model.fit(X_train,y_train) |
| y_pred = model.predict(X_test) |
|
|
|
|