Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	File size: 6,134 Bytes
			
			a9ad3d8 d51ad5a a9ad3d8 d51ad5a a9ad3d8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a a9ad3d8 3e327a8 d51ad5a baf3f32 d51ad5a a9ad3d8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 3e327a8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 58661d4 d51ad5a 58661d4 d51ad5a a9ad3d8 d51ad5a 6ef497e a9ad3d8 d51ad5a 6ef497e d51ad5a 6ef497e d51ad5a 6ef497e a9ad3d8 d51ad5a  | 
								1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164  | 
								import gradio as gr
import os
import pandas as pd
import tempfile
import nest_asyncio
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_parse import LlamaParse
from llama_index.core.node_parser import MarkdownElementNodeParser
# Apply nest_asyncio to handle async operations in Gradio
nest_asyncio.apply()
def get_sheet_names(file):
    """
    Reads an uploaded Excel file and returns its sheet names.
    This function is triggered when a file is uploaded.
    """
    if file is None:
        # No file, so return an empty, non-interactive dropdown
        return gr.Dropdown(choices=[], interactive=False)
    try:
        # Use pandas to quickly get sheet names without loading all data
        xls = pd.ExcelFile(file.name)
        sheet_names = xls.sheet_names
        # Return an updated, interactive dropdown with the sheet names
        return gr.Dropdown(choices=sheet_names, value=sheet_names[0], interactive=True)
    except Exception as e:
        gr.Warning(f"Could not read Excel file: {e}")
        return gr.Dropdown(choices=[], interactive=False)
def process_and_query(openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text):
    """
    Processes a selected sheet from an uploaded Excel file and answers a user's query.
    Args:
        openai_api_key (str): The user's OpenAI API key.
        llama_parse_api_key (str): The user's LlamaParse API key.
        uploaded_file (gradio.File): The uploaded Excel file object.
        selected_sheet (str): The name of the sheet to process.
        query_text (str): The question to ask about the document.
    Returns:
        str: The answer to the query or an error message.
    """
    if not all([openai_api_key, llama_parse_api_key, uploaded_file, selected_sheet, query_text]):
        return "Error: Please provide all inputs - both API keys, a file, a selected sheet, and a query."
    # Create a temporary file to store the selected sheet's data
    temp_file = None
    try:
        os.environ["OPENAI_API_KEY"] = openai_api_key
        llm = OpenAI(model="gpt-4o-mini", api_key=openai_api_key)
        # Read the selected sheet using pandas
        df = pd.read_excel(uploaded_file.name, sheet_name=selected_sheet)
        
        # Save the sheet's data to a temporary CSV file for LlamaParse
        with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv", encoding='utf-8') as temp_file:
            df.to_csv(temp_file.name, index=False)
            temp_file_path = temp_file.name
        # Initialize LlamaParse
        parser = LlamaParse(
            api_key=llama_parse_api_key,
            result_type="markdown",
            verbose=True
        )
        
        # Load data from the temporary file containing only the selected sheet
        documents = parser.load_data(temp_file_path)
        
        # Initialize the parser and process the documents
        node_parser = MarkdownElementNodeParser(llm=llm, num_workers=4)
        nodes = node_parser.get_nodes_from_documents(documents)
        base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
        
        # Create the index and query engine
        recursive_index = VectorStoreIndex(nodes=base_nodes + objects, llm=llm)
        query_engine = recursive_index.as_query_engine(similarity_top_k=5, llm=llm)
        
        # Execute the query
        response = query_engine.query(query_text)
        
        return str(response)
    except Exception as e:
        return f"An error occurred: {e}"
    finally:
        # Clean up the temporary file
        if temp_file and os.path.exists(temp_file.name):
            os.unlink(temp_file.name)
# --- Gradio Interface ---
with gr.Blocks(
    title="Excel Sheet Q&A",
    theme=gr.themes.Soft(), # Keeping the Soft theme as a base
    css="""
    .gradio-container {
        background: linear-gradient(to right, #e0eafc, #cfdef3); /* Softer, light blue gradient */
        color: #333333; /* Darker text for contrast */
    }
    .gr-textbox, .gr-dropdown, .gr-file {
        background-color: #ffffff; /* White backgrounds for input fields */
        border: 1px solid #c0c0c0; /* Subtle border */
        color: #333333;
    }
    .gr-button.primary {
        background-color: #4a90e2; /* A pleasant blue for the primary button */
        color: white;
        border: none;
    }
    .gr-button.primary:hover {
        background-color: #357ABD; /* Slightly darker on hover */
    }
    .gradio-container h1, .gradio-container h2, .gradio-container h3 {
        color: #2c3e50; /* Darker headings */
    }
    .gradio-container label {
        color: #555555; /* Slightly softer label text */
    }
    """
) as iface:
    gr.Markdown(
        """
        #  Excel Sheet Q&A
        
        1.  Enter your API keys.
        2.  Upload an Excel file.
        3.  Choose a specific sheet from the dropdown.
        4.  Ask a question about the data in that sheet.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            openai_key_input = gr.Textbox(label="OpenAI API Key", type="password")
            llamaparse_key_input = gr.Textbox(label="LlamaParse API Key", type="password")
            file_uploader = gr.File(label="Upload Excel Databook", file_types=[".xlsx", ".xls"])
            sheet_selector = gr.Dropdown(label="Choose a Sheet", interactive=False)
        
        with gr.Column(scale=2):
            query_input = gr.Textbox(label="Your Question", placeholder="e.g., What were the total revenues in 2022?", lines=8)
            submit_button = gr.Button("Ask Question", variant="primary")
            output_display = gr.Markdown(label="Answer")
    
    # Define the interactive workflow
    file_uploader.upload(
        fn=get_sheet_names,
        inputs=[file_uploader],
        outputs=[sheet_selector]
    )
    
    submit_button.click(
        fn=process_and_query,
        inputs=[openai_key_input, llamaparse_key_input, file_uploader, sheet_selector, query_input],
        outputs=output_display
    )
if __name__ == "__main__":
    iface.launch(share=True)
 |