import streamlit as st
import pandas as pd
import os
import glob

# Cache the loading of specialties and state files for efficiency
@st.cache_resource
def load_specialties(csv_file='Provider-Specialty.csv'):
    return pd.read_csv(csv_file)

@st.cache_resource
def find_state_files():
    return [file for file in glob.glob('./*.csv') if len(os.path.basename(file).split('.')[0]) == 2]

# Load the provider specialty dataset
specialties = load_specialties()

# User interface for specialty selection
st.title('Provider Specialty Analyzer 📊')

# Markdown outline with emojis for specialty fields
st.markdown('''
## Specialty Fields Description 📝
- **Code**: Unique identifier for the specialty 🆔
- **Grouping**: General category of the specialty 🏷️
- **Classification**: Specific type of practice within the grouping 🎯
- **Specialization**: Further refinement of the classification if applicable 🔍
- **Definition**: Brief description of the specialty 📖
- **Notes**: Additional information or updates about the specialty 🗒️
- **Display Name**: Common name of the specialty 🏷️
- **Section**: Indicates the section of healthcare it belongs to 📚
''')

# Dropdown for selecting a specialty
specialty_options = specialties['Display Name'].unique()
selected_specialty = st.selectbox('Select a Specialty 🩺', options=specialty_options)

# Display specialties matching the selected option or search keyword
search_keyword = st.text_input('Or search for a keyword in specialties 🔍')
if search_keyword:
    filtered_specialties = specialties[specialties.apply(lambda row: row.astype(str).str.contains(search_keyword, case=False).any(), axis=1)]
else:
    filtered_specialties = specialties[specialties['Display Name'] == selected_specialty]

st.dataframe(filtered_specialties)

# State selection UI with MN as the default option for testing
state_files = find_state_files()
state_options = sorted([os.path.basename(file).split('.')[0] for file in state_files])
selected_state = st.selectbox('Select a State (optional) 🗺️', options=state_options, index=state_options.index('MN') if 'MN' in state_options else 0)
use_specific_state = st.checkbox('Filter by selected state only? ✅', value=True)

# Function to process state files and match taxonomy codes
def process_files(specialty_codes, specific_state='MN'):
    results = []
    file_to_process = f'./{specific_state}.csv' if use_specific_state else state_files
    
    for file in [file_to_process] if use_specific_state else state_files:
        state_df = pd.read_csv(file, header=None)  # Assume no header for simplicity
        for code in specialty_codes:
            # Filter rows where the 48th column matches the specialty code
            filtered_df = state_df[state_df[47] == code]
            if not filtered_df.empty:
                results.append((os.path.basename(file).replace('.csv', ''), filtered_df))
    
    return results

# Button to initiate analysis
if st.button('Analyze Text Files for Selected Specialty 🔍'):
    specialty_codes = filtered_specialties['Code'].unique()
    state_data = process_files(specialty_codes, selected_state if use_specific_state else 'MN')
    if state_data:
        for state, df in state_data:
            st.subheader(f"Providers in {state} with Specialty '{selected_specialty}':")
            st.dataframe(df)
    else:
        st.write("No matching records found in text files for the selected specialty.")