saurabhg2083 commited on
Commit
44663cf
1 Parent(s): 2108543
Files changed (1) hide show
  1. app.py +113 -0
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ import string
6
+ import textwrap
7
+ from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForCausalLM, AutoTokenizer, pipeline, AdamW
8
+ from happytransformer import HappyTextToText, TTSettings
9
+ import torch
10
+ from torch.nn import BCEWithLogitsLoss
11
+ from torch.utils.data import DataLoader, TensorDataset, random_split
12
+ from happytransformer import HappyTextToText, TTSettings
13
+
14
+
15
+ pipe = pipeline("text-classification", model="saurabhg2083/model_bert")
16
+ happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
17
+ args = TTSettings(num_beams=5, min_length=1)
18
+ model = BertForSequenceClassification.from_pretrained(save_directory)
19
+ tokenizer = BertTokenizer.from_pretrained(save_directory)
20
+
21
+
22
+ gendered_pronouns = [
23
+ 'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive',
24
+ 'strong', 'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous',
25
+ 'ping pong', 'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive',
26
+ 'ambitious', 'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced',
27
+ 'adventurous', 'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage',
28
+ 'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health',
29
+ 'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible',
30
+ 'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal',
31
+ 'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel',
32
+ 'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her',
33
+ 'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship',
34
+ 'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield',
35
+ 'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven',
36
+ 'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native',
37
+ 'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo',
38
+ 'latino', 'latina', 'migrant', 'blacklist', 'whitelist'
39
+ ]
40
+
41
+ # List of neutral words
42
+ neutral_words = [
43
+ 'ambition', 'driven', 'lead', 'persist', 'principle', 'decision', 'superior', 'individual', 'assertive', 'strong',
44
+ 'hierarchical', 'rigid', 'silicon valley', 'stock options', 'takes risk', 'workforce', 'autonomous', 'ping pong',
45
+ 'pool table', 'must', 'competitive', 'he', 'his', 'himself', 'confident', 'active', 'aggressive', 'ambitious',
46
+ 'fearless', 'headstrong', 'defensive', 'independent', 'dominant', 'outspoken', 'leader', 'fast paced', 'adventurous',
47
+ 'analytical', 'decisive', 'determined', 'ninja', 'objective', 'rock star', 'boast', 'challenging', 'courage',
48
+ 'thoughtful', 'creative', 'adaptable', 'choose', 'curious', 'excellent', 'flexible', 'multitasking', 'health',
49
+ 'imaginative', 'intuitive', 'leans in', 'plans for the future', 'resilient', 'self-aware', 'socially responsible',
50
+ 'trustworthy', 'shup-to-date', 'wellness program', 'nurture', 'teach', 'dependable', 'community', 'serving', 'loyal',
51
+ 'enthusiasm', 'interpersonal', 'connect', 'commit', 'she', 'agree', 'empathy', 'sensitive', 'affectionate', 'feel',
52
+ 'support', 'collaborate', 'honest', 'trust', 'understand', 'compassion', 'share', 'polite', 'kind', 'caring', 'her',
53
+ 'hers', 'herself', 'feminine', 'cheer', 'communal', 'emotional', 'flatterable', 'gentle', 'interdependent', 'kinship',
54
+ 'modesty', 'pleasant', 'polite', 'quiet', 'sympathy', 'warm', 'dominant', 'yield',
55
+ 'native english speaker', 'professionally groomed hair', 'native', 'culture fit', 'non-white', 'clean-shaven',
56
+ 'neat hairstyle', 'master', 'slave', 'a cakewalk', 'brownbag session', 'spirit animal', 'digital native',
57
+ 'servant leadership', 'tribe', 'oriental', 'spic', 'english fluency', 'level native', 'illegals', 'eskimo', 'latino',
58
+ 'latina', 'migrant', 'blacklist', 'whitelist'
59
+ ]
60
+
61
+
62
+
63
+ def replace_gendered_pronouns(text):
64
+ # Define a dictionary of gendered pronouns and their gender-neutral replacements
65
+ word_dict = dict(zip(gendered_pronouns, neutral_words))
66
+
67
+ # Use regular expressions to find and replace gendered pronouns in the text
68
+ for pronoun, replacement in word_dict.items():
69
+ # Use word boundaries to match whole words only
70
+ pattern = r'\b' + re.escape(pronoun) + r'\b'
71
+ text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
72
+
73
+ return text
74
+
75
+ def model_eval(text):
76
+ # Put the model in evaluation mode
77
+ model.eval()
78
+
79
+ # Input text
80
+ input_text = text
81
+
82
+ # Tokenize the input text
83
+ inputs = tokenizer(input_text, padding='max_length', truncation=True, max_length=512, return_tensors="pt")
84
+
85
+ # Make the prediction
86
+ with torch.no_grad():
87
+ outputs = model(**inputs)
88
+
89
+ logits = outputs.logits
90
+ predicted_label = (logits > 0).int().item()
91
+
92
+ return predicted_label
93
+
94
+
95
+ st.title("Job Bias Testing")
96
+
97
+ text1 = st.text_area("Enter Text 1")
98
+
99
+ if st.button("Calculate Similarity"):
100
+ if text1:
101
+ predicted_label = model_eval(text1)
102
+ # Convert 0 or 1 label back to a meaningful label if needed
103
+ label_mapping = {0: "Negative", 1: "Positive"}
104
+ predicted_label_text = label_mapping[predicted_label]
105
+ #print(f"Predicted Label: {predicted_label_text}")
106
+ if predicted_label_text == "Positive":
107
+ rewritten_sentence = replace_gendered_pronouns(text1)
108
+ # Add the prefix "grammar: " before each input
109
+ result = happy_tt.generate_text("grammar: "+rewritten_sentence, args=args)
110
+ #print(result.text) # This sentence has bad grammar.
111
+ st.success(f"Predicted Label: {predicted_label_text} and new Text is: " {result.text})
112
+ else:
113
+ st.warning("Please enter text Job Description.")