# Invisible RAG Pilot Demo App

In [3]:
import json
import gradio as gr


class RAGInterface:
    """
    Setup the gradio app for loading/saving/syncronizing the mockup A/B evaluation RAG tasks.
    The app is deployed on Hugging Face spaces at https://huggingface.co/spaces/sukiboo/invisible-rag-demo
    """

    def __init__(self):
        self.setup_interface()
        self.launch_interface()

    def setup_interface(self):
        """Configure the A/B Evaluation RAG task interface."""
        with gr.Blocks(title='AB Evaluate RAG') as self.interface:

            # protected fields
            _task_id = gr.Textbox(label='Task ID', interactive=False, visible=False)

            # task id, load button, chat history, search results
            with gr.Row():
                task_id = gr.Textbox(container=False, placeholder='Task ID', scale=9)
                load_button = gr.Button('Load Task', scale=1)
            chat = gr.Chatbot(height=700, layout='bubble', label='Chat History')
            sources = gr.Markdown()

            # model completions for answers 1 and 2
            with gr.Row():
                with gr.Column():
                    answer1 = gr.Textbox(label='Answer 1', max_lines=50)
                with gr.Column():
                    answer2 = gr.Textbox(label='Answer 2', max_lines=50)

            # individual ratings for answers 1 and 2
            with gr.Row():
                with gr.Column():
                    groundedness1 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])
                    fluency1 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])
                    utility1 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])
                    notes1 = gr.Textbox(label='Notes', placeholder='N/A')
                with gr.Column():
                    groundedness2 = gr.Radio(label='Groundedness', choices=['Bad', 'Good', 'Perfect'])
                    fluency2 = gr.Radio(label='Fluency', choices=['Bad', 'Good', 'Perfect'])
                    utility2 = gr.Radio(label='Utility', choices=['Catastrophic', 'Bad', 'Good', 'Perfect'])
                    notes2 = gr.Textbox(label='Notes', placeholder='N/A')

            # overall rating
            overall = gr.Radio(label='Overall Rating', choices=['#1 Better', 'Equally Bad', 'Equally Good', '#2 Better'])
            notes = gr.Textbox(label='Notes', placeholder='A brief justification for the overall rating')

            # save button
            save_button = gr.Button('Save Task')

            # input/output fields
            answers = (answer1, answer2)
            ratings1 = (groundedness1, fluency1, utility1, notes1)
            ratings2 = (groundedness2, fluency2, utility2, notes2)
            ratings = (*ratings1, *ratings2, overall, notes)

            # button clicks
            load_button.click(self.load_task, inputs=[task_id], outputs=[_task_id, chat, sources, *answers, *ratings])
            save_button.click(self.save_task, inputs=[_task_id, *ratings], outputs=None)

    def load_task(self, task_id):
        """Load the task and parse the info."""
        task = self.read_task(task_id)
        try:
            id = task['id']
            chat = task['chat_history'] + [[task['question'], task['search_query']]]
            answers = [task['answer_1'], task['answer_2']]
            sources = self.load_sources(task)
            ratings = self.load_ratings(task)
            gr.Info(f'Task {task_id} is loaded!')
            return id, chat, sources, *answers, *ratings
        except:
            raise gr.Error(f'Could not load the task {task_id} :(')

    def read_task(self, task_id):
        """Read the json task file."""
        try:
            with open(f'./data/{task_id}.json') as task_file:
                task = json.load(task_file)
            return task
        except FileNotFoundError:
            raise gr.Error(f'Task {task_id} is not found :(')

    def load_sources(self, task):
        """Parse the search results."""
        sources = ['## Search Results']
        for idx, source in enumerate(task['search_results']):
            sources.append(f'### {idx+1}. {source.replace("<", f"{chr(92)}<")}')
        return '\n\n---\n\n'.join(sources + [''])

    def load_ratings(self, task):
        """Parse the ratings for each answer."""
        # load ratings for answer 1
        ratings1 = (task['ratings_1']['groundedness'],
                    task['ratings_1']['fluency'],
                    task['ratings_1']['utility'],
                    task['ratings_1']['notes'])
        # load ratings for answer 2
        ratings2 = (task['ratings_2']['groundedness'],
                    task['ratings_2']['fluency'],
                    task['ratings_2']['utility'],
                    task['ratings_2']['notes'])
        # load overall ratings
        overall = task['overall']
        notes = task['notes']
        return (*ratings1, *ratings2, overall, notes)

    def save_task(self, task_id, *ratings):
        """Save the task into a new json file."""
        # load the original task
        with open(f'./data/{task_id}.json') as task_file:
            task = json.load(task_file)
        # parse the ratings
        groundedness1, fluency1, utility1, notes1, \
        groundedness2, fluency2, utility2, notes2, \
        overall, notes = ratings
        # update the ratings for answer 1
        task['ratings_1']['groundedness'] = groundedness1
        task['ratings_1']['fluency'] = fluency1
        task['ratings_1']['utility'] = utility1
        task['ratings_1']['notes'] = notes1
        # update the ratings for answer 2
        task['ratings_2']['groundedness'] = groundedness2
        task['ratings_2']['fluency'] = fluency2
        task['ratings_2']['utility'] = utility2
        task['ratings_2']['notes'] = notes2
        # update overall ratings
        task['overall'] = overall
        task['notes'] = notes
        # save the task to json file
        try:
            with open(f'./data/{task_id}.json', 'w', encoding='utf-8') as task_file:
                json.dump(task, task_file, ensure_ascii=False, indent=4)
            gr.Info(f'Task {task_id} is saved!')
        except:
            raise gr.Error(f'Could not save the task {task_id} :(')

    def launch_interface(self):
        """Launch the A/B Evaluation RAG task interface."""
        gr.close_all()
        self.interface.queue(default_concurrency_limit=None)
        self.interface.launch()


rag = RAGInterface()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


In [4]:
# create placeholder tasks
import os
import json

os.makedirs('./data/', exist_ok=True)
for idx in range(3):
    task = {
        'id': f'demo_task_{idx+1}',
        'chat_history': [['user message 1', 'bot message 1'], ['user message 2', 'bot message 2']],
        'question': 'question',
        'search_query': 'search query',
        'search_results': ['source 1', 'source 2', 'source 3'],
        'answer_1': 'answer 1',
        'answer_2': 'answer 2',
        'ratings_1': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},
        'ratings_2': {'groundedness': 'null', 'utility': 'null', 'fluency': 'null', 'notes': ''},
        'overall': 'null',
        'notes': ''
    }
    with open(f'./data/demo_task_{idx+1}.json', 'w', encoding='utf-8') as task_file:
        json.dump(task, task_file, ensure_ascii=False, indent=4)
