hsd / model.py
Amulya M
m3
9e9e397
raw
history blame contribute delete
734 Bytes
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
# Speech to Text
api_key = '9283c8042f454c4d8638fca0484512ab'
endpoint = 'https://centralindia.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
import azure.cognitiveservices.speech as speechsdk
# To ignore Warnings
import warnings
data = pd.read_csv('hate_speech_detection.csv')
X = data['raw_data']
y = data['class']
cv = CountVectorizer()
X_ = cv.fit_transform(X)
X_ = X_.toarray()
X_train, X_test, y_train, y_test = train_test_split(X_, y, test_size=0.4, random_state=True)
model = LogisticRegression(max_iter=1000)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)