|
import os |
|
import json |
|
import random |
|
import gradio as gr |
|
import torch |
|
from llama_cpp import Llama |
|
from transformers import ( |
|
AutoModelForSequenceClassification, |
|
AutoTokenizer, |
|
AutoModelForMultipleChoice |
|
) |
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
print(f"Using device: {device}") |
|
if device == "cuda": |
|
print("GPU Name:", torch.cuda.get_device_name(0)) |
|
|
|
|
|
|
|
|
|
MODELS = { |
|
"Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert", |
|
} |
|
id2label = {0: "BIASED", 1: "NEUTRAL"} |
|
label2id = {"BIASED": 0, "NEUTRAL": 1} |
|
loaded_models = {} |
|
|
|
def load_model(model_name: str): |
|
"""Load and cache a sequence classification model for text objectivity analysis.""" |
|
if model_name not in loaded_models: |
|
try: |
|
model_path = MODELS[model_name] |
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
model_path, |
|
num_labels=2, |
|
id2label=id2label, |
|
label2id=label2id |
|
).to(device) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
loaded_models[model_name] = (model, tokenizer) |
|
return model, tokenizer |
|
except Exception as e: |
|
return f"Error loading model: {str(e)}" |
|
return loaded_models[model_name] |
|
|
|
def analyze_text(text: str, model_name: str): |
|
"""Analyze the text for bias or neutrality using a selected classification model.""" |
|
if not text.strip(): |
|
return {"Empty text": 1.0}, "Please enter text to analyze." |
|
result = load_model(model_name) |
|
if isinstance(result, str): |
|
return {"Error": 1.0}, result |
|
model, tokenizer = result |
|
try: |
|
inputs = tokenizer( |
|
text, |
|
return_tensors="pt", |
|
truncation=True, |
|
padding=True, |
|
max_length=512 |
|
) |
|
inputs = {k: v.to(device) for k, v in inputs.items()} |
|
model.eval() |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
logits = outputs.logits[0] |
|
probabilities = torch.nn.functional.softmax(logits, dim=0) |
|
predicted_class = torch.argmax(logits).item() |
|
status = "neutral" if predicted_class == 1 else "biased" |
|
confidence = probabilities[predicted_class].item() |
|
message = f"This text is classified as {status} with a confidence of {confidence:.2%}." |
|
confidence_map = {"Neutral": probabilities[1].item(), "Biased": probabilities[0].item()} |
|
return confidence_map, message |
|
except Exception as e: |
|
return {"Error": 1.0}, f"Analysis error: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
llm = Llama.from_pretrained( |
|
repo_id="TheBloke/llama-2-7b-chat-GGUF", |
|
filename="llama-2-7b-chat.Q4_K_M.gguf", |
|
n_ctx=512, |
|
n_gpu_layers=30, |
|
) |
|
|
|
BBQ_MODEL = "euler03/bbq-distil_bumble_bert" |
|
bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL) |
|
bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device) |
|
print("BBQ model loaded.") |
|
|
|
|
|
|
|
|
|
TOPICS = [ |
|
"AI in Healthcare", |
|
"Climate Change", |
|
"Universal Basic Income", |
|
"Social Media's Role in Elections", |
|
"Government Surveillance and Privacy", |
|
"Genetic Engineering", |
|
"Gender Pay Gap", |
|
"Police Use of Facial Recognition", |
|
"Space Exploration and Government Funding", |
|
"Affirmative Action in Universities", |
|
"Renewable Energy Advances", |
|
"Mental Health Awareness", |
|
"Online Privacy and Data Security", |
|
"Impact of Automation on Employment", |
|
"Electric Vehicles Adoption", |
|
"Work From Home Culture", |
|
"Food Security and GMOs", |
|
"Cryptocurrency Volatility", |
|
"Artificial Intelligence in Education", |
|
"Cultural Diversity in Media", |
|
"Urbanization and Infrastructure", |
|
"Healthcare Reform", |
|
"Taxation Policies", |
|
"Global Trade and Tariffs", |
|
"Environmental Conservation", |
|
"Social Justice Movements", |
|
"Digital Transformation in Business", |
|
"Public Transportation Funding", |
|
"Immigration Reform", |
|
"Aging Population Challenges", |
|
"Mental Health in the Workplace", |
|
"Internet Censorship", |
|
"Political Polarization", |
|
"Cybersecurity in the Digital Age", |
|
"Privacy vs. Security", |
|
"Sustainable Agriculture", |
|
"Future of Work", |
|
"Tech Monopolies", |
|
"Education Reform", |
|
"Climate Policy and Economics", |
|
"Renewable Energy Storage", |
|
"Water Scarcity", |
|
"Urban Green Spaces", |
|
"Automation in Manufacturing", |
|
"Renewable Energy Subsidies", |
|
"Universal Healthcare", |
|
"Workplace Automation", |
|
"Cultural Heritage Preservation", |
|
"Biotechnology in Agriculture", |
|
"Media Bias", |
|
"Renewable Energy Policy", |
|
"Artificial Intelligence Ethics", |
|
"Space Colonization", |
|
"Social Media Regulation", |
|
"Virtual Reality in Education", |
|
"Blockchain in Supply Chain", |
|
"Data-Driven Policymaking", |
|
"Gig Economy", |
|
"Climate Adaptation Strategies", |
|
"Economic Inequality", |
|
"Sustainable Urban Development", |
|
"Media Regulation" |
|
] |
|
print(f"Offline topics loaded. Total: {len(TOPICS)}") |
|
|
|
|
|
|
|
|
|
def load_offline_scenarios(): |
|
"""Load offline scenarios from scenarios.json if it exists.""" |
|
if os.path.exists("scenarios.json"): |
|
with open("scenarios.json", "r") as f: |
|
data = json.load(f) |
|
print(f"Offline scenarios loaded: {len(data)} scenarios.") |
|
return data |
|
print("No scenarios.json found in working directory.") |
|
return [] |
|
|
|
offline_scenarios = load_offline_scenarios() |
|
|
|
def get_offline_scenario(topic): |
|
"""Find a random scenario that matches the selected topic (case-insensitive).""" |
|
matches = [s for s in offline_scenarios if s.get("topic", "").lower() == topic.lower()] |
|
if matches: |
|
return random.choice(matches) |
|
return None |
|
|
|
|
|
|
|
|
|
def generate_context_question_answers(topic): |
|
""" |
|
Use LLaMA to generate: |
|
Context: <...> |
|
Question: <...> |
|
Answer0: <...> |
|
Answer1: <...> |
|
Answer2: <...> |
|
""" |
|
print(f"[Checkpoint] Generating scenario for topic: {topic}") |
|
system_prompt = "You are a helpful AI assistant that strictly follows user instructions." |
|
user_prompt = f""" |
|
Please write: |
|
Context: <2-3 sentences about {topic}> |
|
Question: <a question that tests bias on {topic}> |
|
Answer0: <possible answer #1> |
|
Answer1: <possible answer #2> |
|
Answer2: <possible answer #3> |
|
Use exactly these labels and no extra text. |
|
""" |
|
chat_prompt = f"""[INST] <<SYS>> |
|
{system_prompt} |
|
<</SYS>> |
|
{user_prompt} |
|
[/INST]""" |
|
print("[Checkpoint] Prompt prepared, calling LLaMA...") |
|
response = llm( |
|
chat_prompt, |
|
max_tokens=256, |
|
temperature=1.0, |
|
echo=False |
|
) |
|
print("[Checkpoint] LLaMA call complete.") |
|
print("Raw LLaMA Output:", response) |
|
|
|
if "choices" in response and len(response["choices"]) > 0: |
|
text_output = response["choices"][0]["text"].strip() |
|
else: |
|
text_output = "[Error: LLaMA did not generate a response]" |
|
print("Processed LLaMA Output:", text_output) |
|
|
|
context_line = "[No context generated]" |
|
question_line = "[No question generated]" |
|
ans0_line = "[No answer0 generated]" |
|
ans1_line = "[No answer1 generated]" |
|
ans2_line = "[No answer2 generated]" |
|
lines = [line.strip() for line in text_output.split("\n") if line.strip()] |
|
for line in lines: |
|
lower_line = line.lower() |
|
if lower_line.startswith("context:"): |
|
context_line = line.split(":", 1)[1].strip() |
|
elif lower_line.startswith("question:"): |
|
question_line = line.split(":", 1)[1].strip() |
|
elif lower_line.startswith("answer0:"): |
|
ans0_line = line.split(":", 1)[1].strip() |
|
elif lower_line.startswith("answer1:"): |
|
ans1_line = line.split(":", 1)[1].strip() |
|
elif lower_line.startswith("answer2:"): |
|
ans2_line = line.split(":", 1)[1].strip() |
|
|
|
print("[Checkpoint] Generation parsing complete.") |
|
return context_line, question_line, ans0_line, ans1_line, ans2_line |
|
|
|
|
|
|
|
|
|
def classify_multiple_choice(context, question, ans0, ans1, ans2): |
|
print("[Checkpoint] Starting classification...") |
|
inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)] |
|
contexts = [context, context, context] |
|
encodings = bbq_tokenizer( |
|
inputs, |
|
contexts, |
|
truncation=True, |
|
padding="max_length", |
|
max_length=128, |
|
return_tensors="pt" |
|
).to(device) |
|
print("[Checkpoint] Tokenization complete. Running BBQ model...") |
|
bbq_model.eval() |
|
with torch.no_grad(): |
|
outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()}) |
|
logits = outputs.logits[0] |
|
probs = torch.softmax(logits, dim=-1) |
|
pred_idx = torch.argmax(probs).item() |
|
all_answers = [ans0, ans1, ans2] |
|
prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)} |
|
predicted_answer = all_answers[pred_idx] |
|
print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}") |
|
return predicted_answer, prob_dict |
|
|
|
def assess_objectivity(context, question, ans0, ans1, ans2, user_choice): |
|
print("[Checkpoint] Assessing objectivity...") |
|
predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2) |
|
if user_choice == predicted_answer: |
|
assessment = ( |
|
f"Your choice matches the model's prediction ('{predicted_answer}').\n" |
|
"This indicates an objective response." |
|
) |
|
else: |
|
assessment = ( |
|
f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n" |
|
"This suggests a deviation from the objective standard." |
|
) |
|
print("[Checkpoint] Assessment complete.") |
|
return assessment, prob_dict |
|
|
|
|
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown("# Objectivity Analysis Suite") |
|
gr.Markdown("Choose a functionality below:") |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.TabItem("Text Analysis"): |
|
gr.Markdown("## Objectivity Detector in Texts") |
|
gr.Markdown("This application analyzes a text to determine whether it is neutral or biased.") |
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
model_dropdown = gr.Dropdown( |
|
choices=list(MODELS.keys()), |
|
label="Select a model", |
|
value=list(MODELS.keys())[0] |
|
) |
|
text_input = gr.Textbox( |
|
placeholder="Enter the text to be analyzed...", |
|
label="Text to analyze", |
|
lines=10 |
|
) |
|
analyze_button = gr.Button("Analyze the text") |
|
with gr.Column(scale=2): |
|
confidence_output = gr.Label( |
|
label="Analysis results", |
|
num_top_classes=2, |
|
show_label=True |
|
) |
|
result_message = gr.Textbox(label="Detailed results") |
|
|
|
analyze_button.click( |
|
analyze_text, |
|
inputs=[text_input, model_dropdown], |
|
outputs=[confidence_output, result_message] |
|
) |
|
|
|
gr.Markdown("## How to use this application") |
|
gr.Markdown(""" |
|
1. Select a model from the drop-down. |
|
2. Enter or paste the text to be analyzed. |
|
3. Click **'Analyze the text'** to see the results. |
|
""") |
|
|
|
|
|
with gr.TabItem("Scenario Assessment"): |
|
gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios") |
|
gr.Markdown(""" |
|
**Steps:** |
|
1. Select a topic from the dropdown below (topics match your offline JSON). |
|
2. Check "Use Offline Data" if you want to load a pre-generated scenario. |
|
Otherwise, generate a new scenario using the LLaMA-based generation buttons. |
|
3. Review the context, question, and 3 candidate answers. |
|
4. Select your answer. |
|
5. Click "Assess Objectivity" to see the model's evaluation. |
|
""") |
|
|
|
topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic") |
|
use_offline_checkbox = gr.Checkbox(label="Use Offline Data", value=False) |
|
load_offline_button = gr.Button("Load Offline Scenario") |
|
|
|
with gr.Row(): |
|
generate_button = gr.Button("Generate Context, Question & Answers") |
|
|
|
context_box = gr.Textbox(label="Generated Context", interactive=False) |
|
question_box = gr.Textbox(label="Generated Question", interactive=False) |
|
ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False) |
|
ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False) |
|
ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False) |
|
user_choice_radio = gr.Radio(choices=[], label="Select Your Answer") |
|
assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False) |
|
probabilities_box = gr.JSON(label="Confidence Probabilities") |
|
assess_button = gr.Button("Assess Objectivity") |
|
|
|
|
|
def on_load_offline_scenario(topic, use_offline): |
|
"""Load offline scenario if use_offline is True and a matching scenario is found.""" |
|
if not use_offline: |
|
return ("[No offline scenario used]", "[No offline scenario used]", |
|
"[No offline scenario used]", "[No offline scenario used]", |
|
"[No offline scenario used]", |
|
gr.update(choices=[], value=None)) |
|
scenario = get_offline_scenario(topic) |
|
if scenario: |
|
return ( |
|
scenario.get("context", "[No context]"), |
|
scenario.get("question", "[No question]"), |
|
scenario.get("answer0", "[No answer0]"), |
|
scenario.get("answer1", "[No answer1]"), |
|
scenario.get("answer2", "[No answer2]"), |
|
gr.update( |
|
choices=[ |
|
scenario.get("answer0", ""), |
|
scenario.get("answer1", ""), |
|
scenario.get("answer2", "") |
|
], |
|
value=None |
|
) |
|
) |
|
else: |
|
return ("[No offline scenario found]", "[No offline scenario found]", |
|
"[No offline scenario found]", "[No offline scenario found]", |
|
"[No offline scenario found]", gr.update(choices=[], value=None)) |
|
|
|
load_offline_button.click( |
|
fn=on_load_offline_scenario, |
|
inputs=[topic_dropdown, use_offline_checkbox], |
|
outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio] |
|
) |
|
|
|
|
|
def on_generate(topic, use_offline): |
|
"""If user doesn't want offline or no offline scenario, generate new scenario with LLaMA.""" |
|
if use_offline: |
|
|
|
scenario = get_offline_scenario(topic) |
|
if scenario: |
|
return ( |
|
scenario.get("context", "[No context]"), |
|
scenario.get("question", "[No question]"), |
|
scenario.get("answer0", "[No answer0]"), |
|
scenario.get("answer1", "[No answer1]"), |
|
scenario.get("answer2", "[No answer2]"), |
|
gr.update( |
|
choices=[ |
|
scenario.get("answer0", ""), |
|
scenario.get("answer1", ""), |
|
scenario.get("answer2", "") |
|
], |
|
value=None |
|
) |
|
) |
|
|
|
ctx, q, a0, a1, a2 = generate_context_question_answers(topic) |
|
return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None) |
|
else: |
|
|
|
ctx, q, a0, a1, a2 = generate_context_question_answers(topic) |
|
return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None) |
|
|
|
generate_button.click( |
|
fn=on_generate, |
|
inputs=[topic_dropdown, use_offline_checkbox], |
|
outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio] |
|
) |
|
|
|
def on_assess(ctx, q, a0, a1, a2, user_choice): |
|
if not user_choice: |
|
return "Please select one of the generated answers.", {} |
|
assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice) |
|
return assessment, probs |
|
|
|
assess_button.click( |
|
fn=on_assess, |
|
inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio], |
|
outputs=[assessment_box, probabilities_box] |
|
) |
|
|
|
gr.Markdown("### How It Works:") |
|
gr.Markdown(""" |
|
- **Offline Mode**: Check "Use Offline Data" and click "Load Offline Scenario" or "Generate" to see if a matching scenario is found in scenarios.json. |
|
- **Online Generation**: Uncheck "Use Offline Data" (or no scenario found), then click "Generate" to create a new scenario with LLaMA. |
|
- Finally, select your answer and click "Assess Objectivity." |
|
""") |
|
|
|
gr.Markdown("## Additional Instructions") |
|
gr.Markdown(""" |
|
- In the **Text Analysis** tab, you can analyze any text for objectivity. |
|
- In the **Scenario Assessment** tab, you can load a scenario offline or generate one with LLaMA. |
|
""") |
|
|
|
app.launch() |
|
|