|
|
|
import streamlit as st |
|
import transformers |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline as tf_pipeline |
|
import torch |
|
import pandas as pd |
|
|
|
|
|
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
|
|
|
class ModelLoader: |
|
def __init__(self): |
|
self.tokenizers = {} |
|
self.models = {} |
|
|
|
def load_tokenizer(self, model_name): |
|
if model_name not in self.tokenizers: |
|
self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name) |
|
return self.tokenizers[model_name] |
|
|
|
def load_model(self, model_type, model_name): |
|
if model_name not in self.models: |
|
if model_type == "classification": |
|
self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(torch_device) |
|
elif model_type == "token_classification": |
|
self.models[model_name] = AutoModelForTokenClassification.from_pretrained(model_name).to(torch_device) |
|
return self.models[model_name] |
|
|
|
model_loader = ModelLoader() |
|
|
|
|
|
class BiasPipeline: |
|
def __init__(self, model_loader): |
|
self.model_loader = model_loader |
|
self.classifier_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-classification-bert") |
|
self.classifier_model = model_loader.load_model("classification", "newsmediabias/UnBIAS-classification-bert") |
|
self.ner_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-NER") |
|
self.ner_model = model_loader.load_model("token_classification", "newsmediabias/UnBIAS-NER") |
|
self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer, device=0) |
|
self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer, device=0) |
|
|
|
def clean_text(self, text): |
|
"""Clean up the text by removing any redundant spaces.""" |
|
return ' '.join(text.split()) |
|
|
|
def process(self, texts): |
|
"""Process texts to classify and find named entities.""" |
|
classification_results = self.classifier(texts) |
|
ner_results = self.ner(texts) |
|
return classification_results, ner_results |
|
|
|
|
|
bias_pipeline = BiasPipeline(model_loader) |
|
|
|
|
|
st.title('UnBIAS App') |
|
|
|
|
|
selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences) |
|
|
|
|
|
input_text = st.text_area("Or enter your own text to analyze and debias:", height=150) |
|
|
|
if st.button("Analyze and Debias Text"): |
|
text_to_process = selected_sentence if selected_sentence else input_text |
|
if text_to_process: |
|
cleaned_text = bias_pipeline.clean_text(text_to_process) |
|
classification_results, ner_results = bias_pipeline.process(cleaned_text) |
|
label = classification_results[0]['label'] |
|
score = classification_results[0]['score'] |
|
st.write(f"**Classification:** {label} (Confidence: {score:.2f})") |
|
biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')] |
|
st.write("**Biased Words Identified:**", ", ".join(biased_words)) |
|
|
|
debiased_text = get_debiased_sequence(cleaned_text) |
|
st.write("## Debiased Text:") |
|
st.write(debiased_text) |
|
else: |
|
st.write("Please enter some text to analyze and debias or select a pre-loaded sentence.") |
|
|
|
|
|
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. The detection of bias depends on various factors, including the context, the training data used for the models, and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, and all results should be reviewed critically by human users.") |
|
|