|
import os |
|
import pandas as pd |
|
import streamlit as st |
|
from util.evaluator import evaluator, write_evaluation_commentary |
|
|
|
|
|
|
|
def check_password(): |
|
def password_entered(): |
|
if password_input == os.getenv('PASSWORD'): |
|
st.session_state['password_correct'] = True |
|
else: |
|
st.error("Incorrect Password, please try again.") |
|
|
|
password_input = st.text_input("Enter Password:", type="password") |
|
submit_button = st.button("Submit", on_click=password_entered) |
|
|
|
if submit_button and not st.session_state.get('password_correct', False): |
|
st.error("Please enter a valid password to access the demo.") |
|
|
|
|
|
|
|
def batch_evaluate(uploaded_file): |
|
df = pd.read_csv(uploaded_file) |
|
eval_instance = evaluator('gpt4-1106') |
|
total_rows = len(df) |
|
results = [] |
|
|
|
|
|
progress_bar = st.progress(0) |
|
|
|
for index, row in enumerate(df.itertuples(), start=1): |
|
question = row.question |
|
explanation = row.explanation |
|
scores = eval_instance(question, explanation) |
|
commentary_details = write_evaluation_commentary(scores) |
|
results.append({ |
|
'Question': question, |
|
'Explanation': explanation, |
|
**{detail['Principle']: detail['Score'] for detail in commentary_details} |
|
}) |
|
|
|
|
|
progress_bar.progress(index / total_rows) |
|
|
|
return pd.DataFrame(results) |
|
|
|
|
|
|
|
st.title('Batch Evaluation of Explanations') |
|
|
|
|
|
st.sidebar.write(""" |
|
### Welcome to the Batch Evaluation of Explanations Demo |
|
This application allows you to evaluate the quality of explanations generated for various questions using different language models. You can either use predefined examples or input your own questions and explanations. |
|
""") |
|
|
|
|
|
st.sidebar.write(""" |
|
### Explanation Principles |
|
When evaluating explanations, consider the following principles mapped to user empowerment and regulatory compliance outcomes: |
|
|
|
1. **Factually Correct**: The information should be accurate and relevant to empower users and meet external audit requirements. |
|
2. **Useful**: Explanations should be clear and meaningful, helping users make informed decisions. |
|
3. **Context Specific**: Explanations should be tailored to the context of use, enhancing their relevance and utility. |
|
4. **User Specific**: Explanations should address the needs and preferences of the user, enabling better decision-making. |
|
5. **Provide Pluralism**: Explanations should present diverse perspectives, allowing users to understand different viewpoints and make well-rounded decisions. |
|
""") |
|
|
|
if not st.session_state.get('password_correct', False): |
|
check_password() |
|
else: |
|
st.sidebar.success("Password Verified. Proceed with the demo.") |
|
|
|
st.write(""" |
|
### Instructions for Uploading CSV |
|
Please upload a CSV file with the following columns: |
|
- `question`: The question you want evaluated. |
|
- `explanation`: The explanation corresponding to the question. |
|
|
|
**Example CSV Format:** |
|
""") |
|
|
|
|
|
example_data = { |
|
"question": [ |
|
"What causes rainbows to appear in the sky?", |
|
"Why is the sky blue?" |
|
], |
|
"explanation": [ |
|
"Rainbows appear when sunlight is refracted, dispersed, and reflected inside water droplets in the atmosphere, resulting in a spectrum of light appearing in the sky.", |
|
"The sky is blue because molecules in the air scatter blue light from the sun more than they scatter red light." |
|
] |
|
} |
|
example_df = pd.DataFrame(example_data) |
|
st.dataframe(example_df) |
|
|
|
uploaded_file = st.file_uploader("Upload CSV file with 'question' and 'explanation' columns", type=['csv']) |
|
|
|
if uploaded_file is not None: |
|
if st.button('Evaluate Explanations'): |
|
result_df = batch_evaluate(uploaded_file) |
|
st.write('### Evaluated Results') |
|
st.dataframe(result_df) |
|
|
|
|
|
csv = result_df.to_csv(index=False) |
|
st.download_button( |
|
label="Download evaluation results as CSV", |
|
data=csv, |
|
file_name='evaluated_results.csv', |
|
mime='text/csv', |
|
) |
|
|