Spaces:

Oriserve
/

ASR_arena

Running

App Files Files Community

ai-team-ori commited on Dec 6, 2024

Commit

7095a34

1 Parent(s): 69ab4f3

first commit

Browse files

Files changed (14) hide show

.gitignore +7 -0
app.py +774 -0
images/1.png +0 -0
images/10.png +0 -0
images/11.png +0 -0
images/2.png +0 -0
images/3.png +0 -0
images/4.png +0 -0
images/5.png +0 -0
images/6.png +0 -0
images/7.png +0 -0
images/8.png +0 -0
images/9.png +0 -0
requirements.txt +8 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+*.json
+mapping
+*.ipynb
+test.py
+results/
+.notebook/
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,774 @@

+import streamlit as st
+import io
+import base64
+import librosa
+import tempfile
+import os
+import random
+from datetime import timedelta
+import shutil
+import csv
+from audio_recorder_streamlit import audio_recorder
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+import time
+import re
+import requests
+SAVE_PATH = "results/results.csv"
+TEMP_DIR = "results/audios"
+if not os.path.exists("results"):
+    os.mkdir("results")
+if not os.path.exists(SAVE_PATH):
+    open(SAVE_PATH,"w").close()
+if not os.path.exists(TEMP_DIR):
+    os.mkdir(TEMP_DIR)
+CREATE_TASK_URL = "https://ai-voice-test.voicegenie.ai/task"
+def decode_audio_array(base64_string):
+    bytes_data = base64.b64decode(base64_string)
+    buffer = io.BytesIO(bytes_data)
+    audio_array = np.load(buffer)
+    return audio_array
+def send_task(payload):
+    response = requests.post(CREATE_TASK_URL,json=payload)
+    response = response.json()
+    if payload["task"] == "transcribe_with_fastapi":
+        return response["text"]
+    elif payload["task"] == "fetch_audio":
+        array = response["array"]
+        array = decode_audio_array(array)
+        sampling_rate = response["sample_rate"]
+        filepath = response["filepath"]
+        return array,sampling_rate,filepath
+def convert_seconds_to_timestamp(seconds):
+    time_delta = timedelta(seconds=seconds)
+    return str(time_delta).split('.')[0]
+def transcribe_whisper(model, path):
+    return model.transcribe(path)["text"]
+class ResultWriter:
+    def __init__(self, save_path):
+        self.save_path = save_path
+        self.headers = [
+            'email',
+            'path',
+            'Ori Apex_score', 'Ori Apex XT_score', 'deepgram_score', 'Ori Swift_score', 'Ori Prime_score',
+            'Ori Apex_appearance', 'Ori Apex XT_appearance', 'deepgram_appearance', 'Ori Swift_appearance', 'Ori Prime_appearance',
+            'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
+        ]
+        if not os.path.exists(save_path):
+            with open(save_path, 'w', newline='') as f:
+                writer = csv.DictWriter(f, fieldnames=self.headers)
+                writer.writeheader()
+    def write_result(self,user_email ,audio_path,option_1_duration_info,option_2_duration_info ,winner_model=None, loser_model=None, both_preferred=False, none_preferred=False):
+        result = {
+            'email': user_email,
+            'path': audio_path,
+            'Ori Apex_score': 0, 'Ori Apex XT_score': 0, 'deepgram_score': 0, 'Ori Swift_score': 0, 'Ori Prime_score': 0,
+            'Ori Apex_appearance': 0, 'Ori Apex XT_appearance': 0, 'deepgram_appearance': 0, 'Ori Swift_appearance': 0, 'Ori Prime_appearance': 0,
+            'Ori Apex_duration':0, 'Ori Apex XT_duration':0, 'deepgram_duration':0, 'Ori Swift_duration':0, 'Ori Prime_duration':0,'azure_score':0,'azure_appearance':0,'azure_duration':0
+        }
+        if winner_model:
+            result[f'{winner_model}_appearance'] = 1
+        if loser_model:
+            result[f'{loser_model}_appearance'] = 1
+        if both_preferred:
+            if winner_model:
+                result[f'{winner_model}_score'] = 1
+            if loser_model:
+                result[f'{loser_model}_score'] = 1
+        elif not none_preferred and winner_model:
+            result[f'{winner_model}_score'] = 1
+        if option_1_duration_info and option_1_duration_info[0]:
+            duration_key, duration_value = option_1_duration_info[0]  # Unpack the tuple
+            if duration_key in self.headers:
+                result[duration_key] = float(duration_value)
+        if option_2_duration_info and option_2_duration_info[0]:
+            duration_key, duration_value = option_2_duration_info[0]  # Unpack the tuple
+            if duration_key in self.headers:
+                result[duration_key] = float(duration_value)
+        with open(self.save_path, 'a', newline='\n') as f:
+            writer = csv.DictWriter(f, fieldnames=self.headers)
+            writer.writerow(result)
+result_writer = ResultWriter(SAVE_PATH)
+def reset_state():
+    st.session_state.option_1 = ""
+    st.session_state.option_2 = ""
+    st.session_state.transcribed = False
+    st.session_state.choice = ""
+    st.session_state.option_selected = False
+    st.session_state.current_audio_path = None
+    st.session_state.option_1_model_name = None
+    st.session_state.option_2_model_name = None
+    st.session_state.option_1_model_name_state = None
+    st.session_state.option_2_model_name_state = None
+    st.session_state.option_2_response_time = None
+    st.session_state.option_1_response_time = None
+    st.session_state.audio_tab = None
+def process_random_file(audio_file):
+    models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
+    option_1_model_name, option_2_model_name = random.sample(models_list, 2)
+    st.session_state.current_audio_path = audio_file
+    st.session_state.option_1_model_name = option_1_model_name
+    st.session_state.option_2_model_name = option_2_model_name
+    return process_normal_audio(audio_file,option_1_model_name,option_2_model_name,"loaded_models")
+def process_audio_file(audio_file):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file:
+        tmp_file.write(audio_file.getvalue())
+        permanent_path = os.path.join(TEMP_DIR, os.path.basename(tmp_file.name))
+        os.makedirs(TEMP_DIR, exist_ok=True)
+        shutil.move(tmp_file.name, permanent_path)
+    st.session_state.current_audio_path = permanent_path
+    models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
+    option_1_model_name, option_2_model_name = random.sample(models_list, 2)
+    st.session_state.option_1_model_name = option_1_model_name
+    st.session_state.option_2_model_name = option_2_model_name
+    return process_normal_audio(permanent_path, option_1_model_name, option_2_model_name, "loaded_models")
+def encode_audio_array(audio_array):
+    buffer = io.BytesIO()
+    np.save(buffer, audio_array)
+    buffer.seek(0)
+    base64_bytes = base64.b64encode(buffer.read())
+    base64_string = base64_bytes.decode('utf-8')
+    return base64_string
+def call_function(model_name,audio_path):
+    if st.session_state.audio_tab:
+        y,_ = librosa.load(audio_path,sr=22050,mono=True)
+        encoded_array = encode_audio_array(y)
+        payload = {
+                "task":"transcribe_with_fastapi",
+                "payload":{
+                    "file_path":encoded_array,
+                    "model_name":model_name,
+                    "audio_b64":True
+                }}
+    else:
+        payload = {
+                "task":"transcribe_with_fastapi",
+                "payload":{
+                    "file_path":audio_path,
+                    "model_name":model_name,
+                    "audio_b64":False
+                }}
+    transcript = send_task(payload)
+    return transcript
+def process_normal_audio(audio_path, model1_name, model2_name, loaded_models):
+    time_1 = time.time()
+    transcript1 = call_function(model1_name,audio_path)
+    time_2 = time.time()
+    transcript2 = call_function(model2_name,audio_path)
+    time_3 = time.time()
+    st.session_state.option_2_response_time = round(time_3 - time_2,3)
+    st.session_state.option_1_response_time = round(time_2 - time_1,3)
+    return transcript1, transcript2
+def process_recorded_audio(audio_bytes):
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
+        tmp_file.write(audio_bytes)
+        permanent_path = os.path.join(TEMP_DIR, f"recorded_{os.path.basename(tmp_file.name)}")
+        os.makedirs(TEMP_DIR, exist_ok=True)
+        shutil.move(tmp_file.name, permanent_path)
+    st.session_state.current_audio_path = permanent_path
+    models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
+    option_1_model_name, option_2_model_name = random.sample(models_list, 2)
+    st.session_state.option_1_model_name = option_1_model_name
+    st.session_state.option_2_model_name = option_2_model_name
+    # loaded_models = load_models()
+    return process_normal_audio(permanent_path, option_1_model_name, option_2_model_name, "loaded_models")
+def get_model_abbreviation(model_name):
+    abbrev_map = {
+        'Ori Apex': 'Ori Apex',
+        'Ori Apex XT': 'Ori Apex XT',
+        'deepgram': 'DG',
+        'Ori Swift': 'Ori Swift',
+        'Ori Prime': 'Ori Prime',
+        'azure' : 'Azure'
+    }
+    return abbrev_map.get(model_name, model_name)
+def calculate_metrics(df):
+    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
+    metrics = {}
+    for model in models:
+        appearances = df[f'{model}_appearance'].sum()
+        wins = df[f'{model}_score'].sum()
+        durations = df[df[f'{model}_appearance'] == 1][f'{model}_duration']
+        if appearances > 0:
+            win_rate = (wins / appearances) * 100
+            avg_duration = durations.mean()
+            duration_std = durations.std()
+        else:
+            win_rate = 0
+            avg_duration = 0
+            duration_std = 0
+        metrics[model] = {
+            'appearances': appearances,
+            'wins': wins,
+            'win_rate': win_rate,
+            'avg_response_time': avg_duration,
+            'response_time_std': duration_std
+        }
+    return metrics
+def create_win_rate_chart(metrics):
+    models = list(metrics.keys())
+    win_rates = [metrics[model]['win_rate'] for model in models]
+    fig = go.Figure(data=[
+        go.Bar(
+            x=[get_model_abbreviation(model) for model in models],
+            y=win_rates,
+            text=[f'{rate:.1f}%' for rate in win_rates],
+            textposition='auto',
+            hovertext=models
+        )
+    ])
+    fig.update_layout(
+        title='Win Rate by Model',
+        xaxis_title='Model',
+        yaxis_title='Win Rate (%)',
+        yaxis_range=[0, 100]
+    )
+    return fig
+def create_appearance_chart(metrics):
+    models = list(metrics.keys())
+    appearances = [metrics[model]['appearances'] for model in models]
+    fig = px.pie(
+        values=appearances,
+        names=[get_model_abbreviation(model) for model in models],
+        title='Model Appearances Distribution',
+        hover_data=[models]
+    )
+    return fig
+def create_head_to_head_matrix(df):
+    models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
+    matrix = np.zeros((len(models), len(models)))
+    for i, model1 in enumerate(models):
+        for j, model2 in enumerate(models):
+            if i != j:
+                matches = df[
+                    (df[f'{model1}_appearance'] == 1) &
+                    (df[f'{model2}_appearance'] == 1)
+                ]
+                if len(matches) > 0:
+                    win_rate = (matches[f'{model1}_score'].sum() / len(matches)) * 100
+                    matrix[i][j] = win_rate
+    fig = go.Figure(data=go.Heatmap(
+        z=matrix,
+        x=[get_model_abbreviation(model) for model in models],
+        y=[get_model_abbreviation(model) for model in models],
+        text=[[f'{val:.1f}%' if val > 0 else '' for val in row] for row in matrix],
+        texttemplate='%{text}',
+        colorscale='RdYlBu',
+        zmin=0,
+        zmax=100
+    ))
+    fig.update_layout(
+        title='Head-to-Head Win Rates',
+        xaxis_title='Opponent Model',
+        yaxis_title='Model'
+    )
+    return fig
+def create_metric_container(label, value, full_name=None):
+    container = st.container()
+    with container:
+        st.markdown(f"**{label}**")
+        if full_name:
+            st.markdown(f"<h3 style='margin-top: 0;'>{value}</h3>", unsafe_allow_html=True)
+            st.caption(f"Full name: {full_name}")
+        else:
+            st.markdown(f"<h3 style='margin-top: 0;'>{value}</h3>", unsafe_allow_html=True)
+def on_option_1_click():
+    if st.session_state.transcribed and not st.session_state.option_selected:
+        st.session_state.option_1_model_name_state = f"👑 {st.session_state.option_1_model_name} 👑"
+        st.session_state.option_2_model_name_state = f"👎 {st.session_state.option_2_model_name} 👎"
+        st.session_state.choice = f"You chose Option 1. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
+        result_writer.write_result(
+            st.session_state.user_email,
+            st.session_state.current_audio_path,
+            winner_model=st.session_state.option_1_model_name,
+            loser_model=st.session_state.option_2_model_name,
+            option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
+            option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
+        )
+        st.session_state.option_selected = True
+def on_option_2_click():
+    if st.session_state.transcribed and not st.session_state.option_selected:
+        st.session_state.option_2_model_name_state = f"👑 {st.session_state.option_2_model_name} 👑"
+        st.session_state.option_1_model_name_state = f"👎 {st.session_state.option_1_model_name} 👎"
+        st.session_state.choice = f"You chose Option 2. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
+        result_writer.write_result(
+            st.session_state.user_email,
+            st.session_state.current_audio_path,
+            winner_model=st.session_state.option_2_model_name,
+            loser_model=st.session_state.option_1_model_name,
+            option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
+            option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
+        )
+        st.session_state.option_selected = True
+def on_option_both_click():
+    if st.session_state.transcribed and not st.session_state.option_selected:
+        st.session_state.option_2_model_name_state = f"👑 {st.session_state.option_2_model_name} 👑"
+        st.session_state.option_1_model_name_state = f"👑 {st.session_state.option_1_model_name} 👑"
+        st.session_state.choice = f"You chose Prefer both. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
+        result_writer.write_result(
+            st.session_state.user_email,
+            st.session_state.current_audio_path,
+            winner_model=st.session_state.option_1_model_name,
+            loser_model=st.session_state.option_2_model_name,
+            option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
+            option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)],
+            both_preferred=True
+        )
+        st.session_state.option_selected = True
+def on_option_none_click():
+    if st.session_state.transcribed and not st.session_state.option_selected:
+        st.session_state.option_1_model_name_state = f"👎 {st.session_state.option_1_model_name} 👎"
+        st.session_state.option_2_model_name_state = f"👎 {st.session_state.option_2_model_name} 👎"
+        st.session_state.choice = f"You chose none option. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
+        result_writer.write_result(
+            st.session_state.user_email,
+            st.session_state.current_audio_path,
+            winner_model=st.session_state.option_1_model_name,
+            loser_model=st.session_state.option_2_model_name,
+            option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
+            option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)],
+            none_preferred=True
+        )
+        st.session_state.option_selected = True
+def on_reset_click():
+    st.session_state.choice = ""
+    st.session_state.option_selected = False
+    reset_state()
+def arena():
+    if 'logged_in' not in st.session_state:
+        st.session_state.logged_in = False
+    if st.session_state.logged_in:
+        # load_models()
+        st.title("⚔️ Ori Speech-To-Text Arena ⚔️")
+        if 'option_1' not in st.session_state:
+            st.session_state.option_1 = ""
+        if 'option_2' not in st.session_state:
+            st.session_state.option_2 = ""
+        if 'transcribed' not in st.session_state:
+            st.session_state.transcribed = False
+        if 'choice' not in st.session_state:
+            st.session_state.choice = ""
+        if 'option_selected' not in st.session_state:
+            st.session_state.option_selected = False
+        if 'current_file_id' not in st.session_state:
+            st.session_state.current_file_id = None
+        if 'current_audio_path' not in st.session_state:
+            st.session_state.current_audio_path = None
+        if "option_1_model_name" not in st.session_state:
+            st.session_state.option_1_model_name = None
+        if "option_2_model_name" not in st.session_state:
+            st.session_state.option_2_model_name = None
+        if "last_recorded_audio" not in st.session_state:
+            st.session_state.last_recorded_audio = None
+        if "last_random_audio" not in st.session_state:
+            st.session_state.last_random_audio = None
+        if "option_1_model_name_state" not in st.session_state:
+            st.session_state.option_1_model_name_state = None
+        if "option_2_model_name_state" not in st.session_state:
+            st.session_state.option_2_model_name_state = None
+        if "option_1_response_time" not in st.session_state:
+            st.session_state.option_1_response_time = None
+        if "option_2_response_time" not in st.session_state:
+            st.session_state.option_2_response_time = None
+        if "audio_tab" not in st.session_state:
+            st.session_state.audio_tab = None
+        tab2, tab3,tab4 = st.tabs(["Upload Audio", "Record Audio","Random Audio Example"])
+        with tab2:
+            normal_audio = st.file_uploader("Upload Normal Audio File", type=['wav', 'mp3'], key='normal_audio')
+            if normal_audio:
+                if st.session_state.get('last_normal_file') != normal_audio.name:
+                    reset_state()
+                    st.session_state.last_normal_file = normal_audio.name
+                    st.session_state.current_file_id = normal_audio.name
+                st.audio(normal_audio)
+                if st.button("Transcribe File"):
+                    reset_state()
+                    st.session_state.choice = ""
+                    st.session_state.option_selected = False
+                    st.session_state.audio_tab = "Upload"
+                    option_1_text, option_2_text = process_audio_file(normal_audio)
+                    st.session_state.option_1 = option_1_text
+                    st.session_state.option_2 = option_2_text
+                    st.session_state.transcribed = True
+        with tab3:
+            audio_bytes = audio_recorder(text="Click 🎙️ to record ((Recording active when icon is red))",pause_threshold=3,icon_size="2x")
+            if audio_bytes and audio_bytes != st.session_state.last_recorded_audio:
+                reset_state()
+                st.session_state.last_recorded_audio = audio_bytes
+                st.session_state.current_file_id = "recorded_audio"
+            st.audio(audio_bytes, format='audio/wav')
+            if st.button("Transcribe Recorded Audio"):
+                if audio_bytes:
+                    reset_state()
+                    st.session_state.choice = ""
+                    st.session_state.option_selected = False
+                    st.session_state.audio_tab = "Upload"
+                    option_1_text, option_2_text = process_recorded_audio(audio_bytes)
+                    st.session_state.option_1 = option_1_text
+                    st.session_state.option_2 = option_2_text
+                    st.session_state.transcribed = True
+        with tab4:
+            fetch_audio_payload = {
+                "task":"fetch_audio"
+            }
+            array,sampling_rate,filepath = send_task(fetch_audio_payload)
+            if "current_random_audio" not in st.session_state:
+                st.session_state.current_random_audio = filepath
+            if "current_array" not in st.session_state:
+                st.session_state.current_array = array
+            if "current_sampling_rate" not in st.session_state:
+                st.session_state.current_sampling_rate = sampling_rate
+            if "current_random_audio" not in st.session_state:
+                st.session_state.current_random_audio = filepath
+            if st.button("Next File"):
+                reset_state()
+                fetch_audio_payload = {
+                "task":"fetch_audio"
+                }
+                array,sampling_rate,filepath = send_task(fetch_audio_payload)
+                st.session_state.current_random_audio = filepath
+                st.session_state.current_array = array
+                st.session_state.current_sampling_rate = sampling_rate
+                st.session_state.last_random_audio = None
+            audio = st.session_state.current_random_audio
+            if audio and audio != st.session_state.last_random_audio:
+                st.session_state.choice = ""
+                st.session_state.option_selected = False
+                st.session_state.last_random_audio = audio
+                st.session_state.current_file_id = audio
+            st.audio(data=st.session_state.current_array,
+            sample_rate=st.session_state.current_sampling_rate,
+            format="audio/wav")
+            if st.button("Transcribe Random Audio"):
+                if audio:
+                    st.session_state.option_selected = False
+                    option_1_text, option_2_text = process_random_file(audio)
+                    st.session_state.option_1 = option_1_text
+                    st.session_state.option_2 = option_2_text
+                    st.session_state.transcribed = True
+        text_containers = st.columns([1, 1])
+        name_containers = st.columns([1, 1])
+        with text_containers[0]:
+            st.text_area("Option 1", value=st.session_state.option_1, height=300)
+        with text_containers[1]:
+            st.text_area("Option 2", value=st.session_state.option_2, height=300)
+        with name_containers[0]:
+            if st.session_state.option_1_model_name_state:
+                st.markdown(f"<div style='text-align: center'>{st.session_state.option_1_model_name_state}</div>", unsafe_allow_html=True)
+        with name_containers[1]:
+            if st.session_state.option_2_model_name_state:
+                st.markdown(f"<div style='text-align: center'>{st.session_state.option_2_model_name_state}</div>", unsafe_allow_html=True)
+        c1, c2, c3, c4 = st.columns(4)
+        with c1:
+            st.button("Prefer Option 1",on_click=on_option_1_click)
+        with c2:
+            st.button("Prefer Option 2",on_click=on_option_2_click)
+        with c3:
+            st.button("Prefer Both",on_click=on_option_both_click)
+        with c4:
+            st.button("Prefer None",on_click=on_option_none_click)
+        st.button("Reset Choice",on_click=on_reset_click)
+    else:
+        st.write('You have not entered your email and name yet')
+        st.write('Please Navigate to login page in the dropdown menu')
+def dashboard():
+    if 'logged_in' not in st.session_state:
+        st.session_state.logged_in = False
+    if st.session_state.logged_in:
+        st.title('Model Arena Scoreboard')
+        df = pd.read_csv(SAVE_PATH)
+        metrics = calculate_metrics(df)
+        MODEL_DESCRIPTIONS = {
+            "Ori Prime": "Foundational, large, and stable.",
+            "Ori Swift": "Lighter and faster than Ori Prime.",
+            "Ori Apex": "The top-performing model, fast and stable.",
+            "Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
+            "DG" : "Deepgram Nova-2 API",
+            "Azure" : "Azure Speech Services API"
+        }
+        st.header('Model Descriptions')
+        cols = st.columns(2)
+        for idx, (model, description) in enumerate(MODEL_DESCRIPTIONS.items()):
+            with cols[idx % 2]:
+                st.markdown(f"""
+                    <div style='padding: 1rem; border: 1px solid #e1e4e8; border-radius: 6px; margin-bottom: 1rem;'>
+                        <h3 style='margin: 0; margin-bottom: 0.5rem;'>{model}</h3>
+                        <p style='margin: 0; color: #6e7681;'>{description}</p>
+                    </div>
+                    """, unsafe_allow_html=True)
+        st.header('Overall Performance')
+        col1, col2, col3= st.columns(3)
+        with col1:
+            create_metric_container("Total Matches", len(df))
+        best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
+        with col2:
+            create_metric_container(
+                "Best Model",
+                get_model_abbreviation(best_model),
+                full_name=best_model
+            )
+        most_appearances = max(metrics.items(), key=lambda x: x[1]['appearances'])[0]
+        with col3:
+            create_metric_container(
+                "Most Used",
+                get_model_abbreviation(most_appearances),
+                full_name=most_appearances
+            )
+        st.header('Win Rates')
+        win_rate_chart = create_win_rate_chart(metrics)
+        st.plotly_chart(win_rate_chart, use_container_width=True)
+        st.header('Appearance Distribution')
+        appearance_chart = create_appearance_chart(metrics)
+        st.plotly_chart(appearance_chart, use_container_width=True)
+        st.header('Head-to-Head Analysis')
+        matrix_chart = create_head_to_head_matrix(df)
+        st.plotly_chart(matrix_chart, use_container_width=True)
+        st.header('Detailed Metrics')
+        metrics_df = pd.DataFrame.from_dict(metrics, orient='index')
+        metrics_df['win_rate'] = metrics_df['win_rate'].round(2)
+        metrics_df.drop(["avg_response_time","response_time_std"],axis=1,inplace=True)
+        # metrics_df['avg_response_time'] = metrics_df['avg_response_time'].round(3)
+        metrics_df.index = [get_model_abbreviation(model) for model in metrics_df.index]
+        st.dataframe(metrics_df)
+        st.header('Full Dataframe')
+        df = df.drop('path', axis=1)
+        df = df.drop(['Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_duration','email'],axis=1)
+        st.dataframe(df)
+    else:
+        st.write('You have not entered your email and name yet')
+        st.write('Please Navigate to login page in the dropdown menu')
+def help():
+    st.title("Help")
+    st.markdown(
+    """
+    # Ori Speech-To-Text Arena
+## Introduction
+Below are the general instructions for participating in the Ori Speech-To-Text Arena.
+## Options:
+There are three options for participating in the Ori Speech-To-Text Arena:
+1. Compare different model by uploading your own audio file and submit it to the Arena
+2. Compare different model by recording your own audio file and submit it to the Arena
+3. Choose and compare from one of our randomly selected audio files
+### 1. Compare different model by uploading your own audio file and submit it to the Arena
+Steps:
+1. Select the upload audio file option
+""")
+    st.image("./images/1.png")
+    st.image("./images/2.png")
+    st.image("./images/3.png")
+    st.image("./images/4.png")
+    st.markdown("""
+### 2. Compare different model by recording your own audio file and submit it to the Arena
+Steps:
+1. Select the record audio file option
+""")
+    st.image("./images/5.png")
+    st.image("./images/6.png")
+    st.image("./images/7.png")
+    st.markdown("""
+4. Rest of the steps remain same as above
+### 3. Choose and compare from one of our randomly selected audio files
+Steps:
+1. Select the random audio file option
+""")
+    st.image("./images/8.png")
+    st.image("./images/9.png")
+    st.markdown("""
+4. Rest of the steps remain same as above
+""")
+    st.image("./images/10.png")
+def validate_email(email):
+    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    return re.match(pattern, email) is not None
+def validate_name(name):
+    pattern = r'^[a-zA-Z\s-]{2,}$'
+    return re.match(pattern, name) is not None
+def create_login_page():
+    st.title("Welcome to the App")
+    if 'logged_in' not in st.session_state:
+        st.session_state.logged_in = False
+    if not st.session_state.logged_in:
+        with st.form("login_form"):
+            st.subheader("Please Login")
+            email = st.text_input("Email")
+            name = st.text_input("Name")
+            submit_button = st.form_submit_button("Login")
+            if submit_button:
+                if not email or not name:
+                    st.error("Please fill in all fields")
+                else:
+                    if not validate_email(email):
+                        st.error("Please enter a valid email address")
+                    elif not validate_name(name):
+                        st.error("Please enter a valid name (letters, spaces, and hyphens only)")
+                    else:
+                        st.session_state.logged_in = True
+                        st.session_state.user_email = email
+                        st.session_state.user_name = name
+                        st.success("Login successful! You can now navigate to the Arena using the dropdown in the sidebar")
+    else:
+        st.success("You have already logged in. You can now navigate to the Arena using the dropdown in the sidebar")
+page_names_to_funcs = {
+    "Login" : create_login_page,
+    "Arena": arena,
+    "Scoreboard": dashboard,
+    "Help": help
+}
+demo_name = st.sidebar.selectbox("Choose a View\nTo view the help page choose the help view", page_names_to_funcs.keys())
+page_names_to_funcs[demo_name]()