import streamlit as st import pandas as pd import os import base64 # Import evaluation modules from phoenix_code import phoenix_eval from ragas_code import ragas_eval from traditional_metrics_score import RAGEvaluator # Set page configuration st.set_page_config( page_title="RAG Evaluation Toolkit", page_icon="🔍", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for improved styling def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) # Function to create a more visually appealing file uploader def custom_file_uploader(): st.markdown("""
📂
Drag and Drop or Browse Files
Supports CSV, XLS, XLSX
""", unsafe_allow_html=True) uploaded_file = st.file_uploader( "Upload Dataset", type=["csv", "xls", "xlsx"], label_visibility="collapsed" ) return uploaded_file # Main Streamlit App def main(): # Custom CSS for enhanced styling st.markdown(""" """, unsafe_allow_html=True) # App Title st.markdown("

🔍 RAG Evaluation Toolkit

", unsafe_allow_html=True) # Sidebar for Configuration st.sidebar.header("📋 Evaluation Configuration") # API Key Input with improved styling st.sidebar.subheader("OpenAI API Key") openai_api_key = st.sidebar.text_input( "Enter your OpenAI API Key", type="password", help="Required for running evaluations" ) # File Upload Section st.markdown("### 📊 Upload Your Dataset") uploaded_file = custom_file_uploader() # Evaluation Type Selection st.sidebar.subheader("🛠 Evaluation Methods") evaluation_methods = { "Phoenix Evaluation": [ "hallucination", "toxicity", "relevance", "Q&A" ], "RAGAS Evaluation": [ "answer_correctness", "answer_relevancy", "faithfulness", "context_precision", "context_recall", "context_relevancy", "answer_similarity" ], "Traditional Metrics": [ "BLEU", "ROUGE-1", "BERT Score", "Perplexity", "Diversity", "Racial Bias" ] } # Multiselect for each evaluation method selected_metrics = {} for method, metrics in evaluation_methods.items(): if st.sidebar.checkbox(method): selected_metrics[method] = st.sidebar.multiselect( f"Select {method} Metrics", metrics ) # Evaluation Button if uploaded_file and openai_api_key and selected_metrics: if st.button("🚀 Run Evaluation"): # Load data file_extension = os.path.splitext(uploaded_file.name)[1] if file_extension.lower() == ".csv": df = pd.read_csv(uploaded_file) elif file_extension.lower() in [".xls", ".xlsx"]: df = pd.read_excel(uploaded_file) # Combine results combined_results = pd.DataFrame() # Progress bar progress_bar = st.progress(0) # Run evaluations with st.spinner("Processing evaluations..."): # Phoenix Evaluation if "Phoenix Evaluation" in selected_metrics: progress_bar.progress(33) phoenix_results = phoenix_eval( selected_metrics.get("Phoenix Evaluation", []), openai_api_key, df.copy() ) combined_results = pd.concat([combined_results, phoenix_results], axis=1) # RAGAS Evaluation if "RAGAS Evaluation" in selected_metrics: progress_bar.progress(66) ragas_results = ragas_eval( selected_metrics.get("RAGAS Evaluation", []), openai_api_key, df.copy() ) combined_results = pd.concat([combined_results, ragas_results], axis=1) # Traditional Metrics Evaluation if "Traditional Metrics" in selected_metrics: progress_bar.progress(100) traditional_results = RAGEvaluator( df=df.copy(), selected_metrics=selected_metrics.get("Traditional Metrics", []) ) combined_results = pd.concat([combined_results, traditional_results], axis=1) # Save results results_filename = "rag_evaluation_results.xlsx" combined_results.to_excel(results_filename, index=False) # Success message and download button st.success("Evaluation Completed Successfully!") # Create download button with improved styling with open(results_filename, "rb") as file: btn = st.download_button( label="📥 Download Evaluation Results", data=file, file_name=results_filename, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) # Display results preview st.markdown("### 📊 Results Preview") st.dataframe(combined_results) # Run the app if __name__ == "__main__": main()