Spaces:

Pixeltable
/

Multi-LLM-RAG-with-Groundtruth-Comparison

Sleeping

App Files Files Community

Multi-LLM-RAG-with-Groundtruth-Comparison / app.py

PierreBrunelle

Update app.py

98cccd6 verified 3 months ago

raw

history blame

11.5 kB

	import gradio as gr
	import pandas as pd
	import io
	import base64
	import uuid
	import pixeltable as pxt
	from pixeltable.iterators import DocumentSplitter
	import numpy as np
	from pixeltable.functions.huggingface import sentence_transformer
	from pixeltable.functions import openai
	from pixeltable.functions.fireworks import chat_completions as f_chat_completions
	from pixeltable.functions.mistralai import chat_completions
	from gradio.themes import Monochrome

	import os
	import getpass

	"""## Store OpenAI API Key"""

	if 'OPENAI_API_KEY' not in os.environ:
	os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API key:')

	if 'FIREWORKS_API_KEY' not in os.environ:
	os.environ['FIREWORKS_API_KEY'] = getpass.getpass('Fireworks API Key:')

	if 'MISTRAL_API_KEY' not in os.environ:
	os.environ['MISTRAL_API_KEY'] = getpass.getpass('Mistral AI API Key:')

	"""## Creating UDFs: Embedding and Prompt Functions"""

	# Set up embedding function
	@pxt.expr_udf
	def e5_embed(text: str) -> np.ndarray:
	return sentence_transformer(text, model_id='intfloat/e5-large-v2')

	# Create prompt function
	@pxt.udf
	def create_prompt(top_k_list: list[dict], question: str) -> str:
	concat_top_k = '\n\n'.join(
	elt['text'] for elt in reversed(top_k_list)
	)
	return f'''
	PASSAGES:

	{concat_top_k}

	QUESTION:

	{question}'''

	"""Gradio Application"""

	def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, show_question, show_correct_answer, show_gpt4omini, show_llamav3p23b, show_mistralsmall, progress=gr.Progress()):
	# Ensure a clean slate for the demo by removing and recreating the 'rag_demo' directory
	progress(0, desc="Initializing...")

	pxt.drop_dir('rag_demo', force=True)
	pxt.create_dir('rag_demo')

	# Process the ground truth file, which contains questions and correct answers
	# Import as CSV or Excel depending on the file extension
	if ground_truth_file.name.endswith('.csv'):
	queries_t = pxt.io.import_csv('rag_demo.queries', ground_truth_file.name)
	else:
	queries_t = pxt.io.import_excel('rag_demo.queries', ground_truth_file.name)

	progress(0.2, desc="Processing documents...")

	# Create a table to store the uploaded PDF documents
	documents_t = pxt.create_table(
	'rag_demo.documents',
	{'document': pxt.DocumentType()}
	)

	# Insert the PDF files into the documents table
	documents_t.insert({'document': file.name} for file in pdf_files if file.name.endswith('.pdf'))

	# Create a view that splits the documents into smaller chunks
	chunks_t = pxt.create_view(
	'rag_demo.chunks',
	documents_t,
	iterator=DocumentSplitter.create(
	document=documents_t.document,
	separators=chunk_separator,
	limit=chunk_limit if chunk_separator in ["token_limit", "char_limit"] else None
	)
	)

	progress(0.4, desc="Generating embeddings...")

	# Add an embedding index to the chunks for similarity search
	chunks_t.add_embedding_index('text', string_embed=e5_embed)

	# Define a query function to retrieve the top-k most similar chunks for a given question
	@chunks_t.query
	def top_k(query_text: str):
	sim = chunks_t.text.similarity(query_text)
	return (
	chunks_t.order_by(sim, asc=False)
	.select(chunks_t.text, sim=sim)
	.limit(5)
	)

	# Add computed columns to the queries table for context retrieval and prompt creation
	queries_t['question_context'] = chunks_t.top_k(queries_t.question)
	queries_t['prompt'] = create_prompt(
	queries_t.question_context, queries_t.question
	)

	# Prepare messages for the OpenAI API, including system instructions and user prompt
	msgs = [
	{
	'role': 'system',
	'content': 'Read the following passages and answer the question based on their contents.'
	},
	{
	'role': 'user',
	'content': queries_t.prompt
	}
	]

	progress(0.6, desc="Querying models...")

	# Add OpenAI response column
	queries_t['response'] = openai.chat_completions(
	model='gpt-4o-mini-2024-07-18',
	messages=msgs,
	max_tokens=300,
	top_p=0.9,
	temperature=0.7
	)

	# Create a table in Pixeltable and pick a model hosted on Anthropic with some parameters
	queries_t['response_2'] = f_chat_completions(
	messages=msgs,
	model='accounts/fireworks/models/llama-v3p2-3b-instruct',
	# These parameters are optional and can be used to tune model behavior:
	max_tokens=300,
	top_p=0.9,
	temperature=0.7
	)

	queries_t['response_3'] = chat_completions(
	messages=msgs,
	model='mistral-small-latest',
	# These parameters are optional and can be used to tune model behavior:
	max_tokens=300,
	top_p=0.9,
	temperature=0.7
	)

	# Extract the answer text from the API response
	queries_t['gpt4omini'] = queries_t.response.choices[0].message.content
	queries_t['llamav3p23b'] = queries_t.response_2.choices[0].message.content
	queries_t['mistralsmall'] = queries_t.response_3.choices[0].message.content

	# Prepare the output dataframe with selected columns
	columns_to_show = []
	if show_question:
	columns_to_show.append(queries_t.question)
	if show_correct_answer:
	columns_to_show.append(queries_t.correct_answer)
	if show_gpt4omini:
	columns_to_show.append(queries_t.gpt4omini)
	if show_llamav3p23b:
	columns_to_show.append(queries_t.llamav3p23b)
	if show_mistralsmall:
	columns_to_show.append(queries_t.mistralsmall)

	df_output = queries_t.select(*columns_to_show).collect().to_pandas()

	try:
	# Return the output dataframe for display
	return df_output
	except Exception as e:
	return f"An error occurred: {str(e)}", None

	def save_dataframe_as_csv(data):
	print(f"Type of data: {type(data)}")
	if isinstance(data, pd.DataFrame):
	print(f"Shape of DataFrame: {data.shape}")
	if isinstance(data, pd.DataFrame) and not data.empty:
	filename = f"results_{uuid.uuid4().hex[:8]}.csv"
	filepath = os.path.join('tmp', filename)
	os.makedirs('tmp', exist_ok=True)
	data.to_csv(filepath, index=False)
	return filepath
	return None

	# Gradio interface
	with gr.Blocks(theme=Monochrome) as demo:
	gr.Markdown(
	"""
	<div max-width: 800px; margin: 0 auto;">
	<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" />
	<h1 style="margin-bottom: 0.5em;">Multi-LLM RAG Benchmark: Document Q&A with Groundtruth Comparison</h1>
	</div>
	"""
	)
	gr.HTML(
	"""
	<p>
	<a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
	</p>
	"""
	)

	# Add the disclaimer
	gr.Markdown(
	"""
	<div style="background-color: #E5DDD4; border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
	<strong>Disclaimer:</strong> This Gradio app is running on OpenAI, Mistral, and Fireworks accounts with the developer's personal API keys.
	If you wish to use it with your own hardware or API keys, you can
	<a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison?duplicate=true" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a>
	or run it locally or in Google Colab.
	</div>
	"""
	)

	with gr.Row():
	with gr.Column():
	with gr.Accordion("What This Demo Does", open = True):
	gr.Markdown("""
	1. Ingests Documents: Uploads your PDF documents and a ground truth file (CSV or XLSX).
	2. Process and Retrieve Data: Store, chunk, index, orchestrate, and retrieve all data.
	4. Generates Answers: Leverages OpenAI to produce accurate answers based on the retrieved context.
	5. Compares Results: Displays the generated answers alongside the ground truth for easy evaluation.
	""")
	with gr.Column():
	with gr.Accordion("How to Use", open = True):
	gr.Markdown("""
	1. Upload your ground truth file (CSV or XLSX) with the following two columns: question and correct_answer.
	2. Upload one or more PDF documents that contain the information to answer these questions.
	3. Click "Process Files and Generate Output" to start the RAG process.
	4. View the results in the table below, comparing AI-generated answers to the ground truth.
	""")

	# File upload components for ground truth and PDF documents
	with gr.Row():
	ground_truth_file = gr.File(label="Upload Ground Truth (CSV or XLSX) - Format to respect:question \| correct_answer", file_count="single")
	pdf_files = gr.File(label="Upload PDF Documents", file_count="multiple")

	# Add controls for chunking parameters
	with gr.Row():
	chunk_limit = gr.Slider(minimum=100, maximum=500, value=300, step=5, label="Chunk Size Limit (only used when the separator is token_/char_limit)")
	chunk_separator = gr.Dropdown(
	choices=["token_limit", "char_limit", "sentence", "paragraph", "heading"],
	value="token_limit",
	label="Chunk Separator"
	)

	with gr.Row():
	show_question = gr.Checkbox(label="Show Question", value=True)
	show_correct_answer = gr.Checkbox(label="Show Correct Answer", value=True)
	show_gpt4omini = gr.Checkbox(label="Show GPT-4o-mini Answer", value=True)
	show_llamav3p23b = gr.Checkbox(label="Show LLaMA-v3-2-3B Answer", value=True)
	show_mistralsmall = gr.Checkbox(label="Show Mistral-Small Answer", value=True)

	# Button to trigger file processing
	process_button = gr.Button("Process Files and Generate Outputs")

	# Output component to display the results
	df_output = gr.DataFrame(label="Pixeltable Table",
	wrap=True
	)

	with gr.Row():
	with gr.Column(scale=1):
	download_button = gr.Button("Download Results as CSV")
	with gr.Column(scale=2):
	csv_output = gr.File(label="CSV Download")

	def trigger_download(data):
	csv_path = save_dataframe_as_csv(data)
	return csv_path if csv_path else None

	process_button.click(process_files,
	inputs=[ground_truth_file,
	pdf_files,
	chunk_limit,
	chunk_separator,
	show_question,
	show_correct_answer,
	show_gpt4omini,
	show_llamav3p23b,
	show_mistralsmall],
	outputs=df_output)

	download_button.click(
	trigger_download,
	inputs=[df_output],
	outputs=[csv_output]
	)

	if __name__ == "__main__":
	demo.launch(debug=True)