File size: 1,684 Bytes
6421cd0
1cd1d91
ca1edf9
6421cd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca1edf9
 
6421cd0
 
 
 
1a863cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import transformers
# import torch

# from datasets import DatasetDict, Dataset

# # Load train, test, and validation JSON files
# train_data = Dataset.from_json('jsonDataTrain.json')
# test_data = Dataset.from_json('jsonDataTest.json')
# validation_data = Dataset.from_json('jsonDataVal.json')

# # Define the features
# features = ['Post', 'defamation', 'hate', 'non-hostile', 'offensive']

labels = ['hate', 'non-hostile', 'defamation', 'offensive']
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}

from transformers import BertTokenizer, BertForSequenceClassification

# Load the fine-tuned model and tokenizer
model_name = "fine_tuned_hindi_bert_model"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

# Example input text
# input_text = "मैं एक छात्र हूं जो छात्रावास में रहता हूं और दृढ़ संकल्प के साथ अपनी पढ़ाई करता हूं लेकिन मेरा दोस्त मूर्ख है। वह हर समय गेम खेलता है और खाना खाता है।"
st.title("Hate Speech Classificsation Demo")

input_text = st.input_text(input)

# Tokenize the input text
inputs = tokenizer(input_text, return_tensors="pt")

# Perform inference
# with torch.no_grad():
outputs = model(**inputs)

# Get the predicted class
predicted_class = outputs.logits
for i in range(len(predicted_class[0])):
  st.write(id2label[i], predicted_class[0][i].item())