import streamlit as st
import pandas as pd
import os
import glob

# Cache the loading of specialties for efficiency
@st.cache_resource
def load_specialties(csv_file='Provider-Specialty.csv'):
    return pd.read_csv(csv_file)

# Cache the finding of state files to avoid repeated file system access
@st.cache_resource
def find_state_files():
    return [file for file in glob.glob('./*.csv') if len(os.path.basename(file).split('.')[0]) == 2]

specialties = load_specialties()

# UI for specialty selection with an engaging title
st.title('Provider Specialty Analyzer with Code Grouping and Classification 📊')

st.markdown('''
## Specialty Fields Description 📝
- **Code**: Unique identifier for the specialty 🆔
- **Grouping**: General category of the specialty 🏷️
- **Classification**: Specific type of practice within the grouping 🎯
- **Specialization**: Further refinement of the classification if applicable 🔍
- **Definition**: Brief description of the specialty 📖
- **Notes**: Additional information or updates about the specialty 🗒️
- **Display Name**: Common name of the specialty 🏷️
- **Section**: Indicates the section of healthcare it belongs to 📚
''')

# Allows users to select or search for a specialty
specialty_options = specialties['Display Name'].unique()
selected_specialty = st.selectbox('Select a Specialty 🩺', options=specialty_options)

# Keyword search functionality
search_keyword = st.text_input('Or search for a keyword in specialties 🔍')
if search_keyword:
    filtered_specialties = specialties[specialties.apply(lambda row: row.astype(str).str.contains(search_keyword, case=False).any(), axis=1)]
else:
    filtered_specialties = specialties[specialties['Display Name'] == selected_specialty]

st.dataframe(filtered_specialties[['Code', 'Grouping', 'Classification', 'Specialization', 'Definition']])

# State selection UI with default selection for testing
state_files = find_state_files()
state_options = sorted([os.path.basename(file).split('.')[0] for file in state_files])
selected_state = st.selectbox('Select a State (optional) 🗺️', options=state_options, index=state_options.index('MN') if 'MN' in state_options else 0)

# Checkbox to filter by selected state only
use_specific_state = st.checkbox('Filter by selected state only? ✅', value=True)

# Process files based on specialty codes and state selection
def process_files(specialty_codes, specific_state='MN'):
    results = []
    file_to_process = f'./{specific_state}.csv' if use_specific_state else state_files
    
    for file in [file_to_process] if use_specific_state else state_files:
        state_df = pd.read_csv(file, header=None)  # Assuming no header for simplicity
        for code in specialty_codes:
            filtered_df = state_df[state_df[47].isin([code])]  # Match against 48th column, adjust as needed
            if not filtered_df.empty:
                # Enhance the display to include 'Code', 'Grouping', and 'Classification' information
                display_info = specialties[specialties['Code'] == code][['Code', 'Grouping', 'Classification']].iloc[0].to_dict()
                results.append((os.path.basename(file).replace('.csv', ''), display_info, filtered_df))
    
    return results

# Button to initiate the analysis
if st.button('Analyze Text Files for Selected Specialty 🔍'):
    specialty_codes = filtered_specialties['Code'].tolist()
    state_data = process_files(specialty_codes, selected_state if use_specific_state else None)
    if state_data:
        for state, info, df in state_data:
            st.subheader(f"Providers in {state} with Specialties related to '{search_keyword or selected_specialty}':")
            st.markdown(f"**Code**: {info['Code']}, **Grouping**: {info['Grouping']}, **Classification**: {info['Classification']}")
            st.dataframe(df)
    else:
        st.write("No matching records found in text files for the selected specialties.")