import pandas as pd import streamlit as st from transformers import pipeline import os # Load models model1 = pipeline("text-classification", model="vectara/hallucination_evaluation_model") model2 = pipeline("text-classification", model="sileod/deberta-v3-base-tasksource-nli") # Predefined examples examples = { 'good': { 'question': "What causes rainbows to appear in the sky?", 'explanation': "Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky.", 'ground_truth': "Correct" }, 'bad': { 'question': "What causes rainbows to appear in the sky?", 'explanation': "Rainbows happen because light in the sky gets mixed up and sometimes shows colors when it's raining or when there is water around.", 'ground_truth': "Incorrect" } } # Function to evaluate explanations using the two models def evaluate_explanation(question, explanation): results1 = model1(explanation) results2 = model2(explanation) return results1, results2 # Function to compare vectors (simple difference in scores as example) def compare_vectors(v1, v2): diff = abs(v1[0]['score'] - v2[0]['score']) return diff # Title of the application st.title('Dual Model Evaluation of Explanations') # Check for password before allowing access def check_password(): def password_entered(): if password_input == os.getenv('PASSWORD'): st.session_state['password_correct'] = True else: st.error("Incorrect Password, please try again.") password_input = st.text_input("Enter Password:", type="password") submit_button = st.button("Submit", on_click=password_entered) if submit_button and not st.session_state.get('password_correct', False): st.error("Please enter a valid password to access the demo.") # Password check if not st.session_state.get('password_correct', False): check_password() else: st.sidebar.success("Password Verified. Proceed with the demo.") input_type = st.radio("Choose input type:", ('Use predefined example', 'Enter your own')) if input_type == 'Use predefined example': example_type = st.radio("Select an example type:", ('good', 'bad')) selected_example = examples[example_type] question = selected_example['question'] explanation = selected_example['explanation'] ground_truth = selected_example['ground_truth'] else: question = st.text_input('Enter your question:', '') explanation = st.text_input('Enter your explanation:', '') ground_truth = st.text_input('Enter ground truth:', '') if st.button('Evaluate Explanation'): if question and explanation and ground_truth: results1, results2 = evaluate_explanation(question, explanation) diff = compare_vectors(results1, results2) st.write('### Model 1 Results') st.write(results1) st.write('### Model 2 Results') st.write(results2) st.write(f'### Score Difference: {diff}') else: st.error('Please enter a question, explanation, and ground truth to evaluate.')