import streamlit as st import pandas as pd import os import glob # Cache the loading of specialties and state files for efficiency @st.cache_resource def load_specialties(csv_file='Provider-Specialty.csv'): return pd.read_csv(csv_file) @st.cache_resource def find_state_files(): return [file for file in glob.glob('./*.csv') if len(os.path.basename(file).split('.')[0]) == 2] # Load the provider specialty dataset specialties = load_specialties() # User interface for specialty selection st.title('Provider Specialty Analyzer πŸ“Š') # Markdown outline with emojis for specialty fields st.markdown(''' ## Specialty Fields Description πŸ“ - **Code**: Unique identifier for the specialty πŸ†” - **Grouping**: General category of the specialty 🏷️ - **Classification**: Specific type of practice within the grouping 🎯 - **Specialization**: Further refinement of the classification if applicable πŸ” - **Definition**: Brief description of the specialty πŸ“– - **Notes**: Additional information or updates about the specialty πŸ—’οΈ - **Display Name**: Common name of the specialty 🏷️ - **Section**: Indicates the section of healthcare it belongs to πŸ“š ''') # Dropdown for selecting a specialty specialty_options = specialties['Display Name'].unique() selected_specialty = st.selectbox('Select a Specialty 🩺', options=specialty_options) # Display specialties matching the selected option or search keyword search_keyword = st.text_input('Or search for a keyword in specialties πŸ”') if search_keyword: filtered_specialties = specialties[specialties.apply(lambda row: row.astype(str).str.contains(search_keyword, case=False).any(), axis=1)] else: filtered_specialties = specialties[specialties['Display Name'] == selected_specialty] st.dataframe(filtered_specialties) # State selection UI with MN as the default option for testing state_files = find_state_files() state_options = sorted([os.path.basename(file).split('.')[0] for file in state_files]) selected_state = st.selectbox('Select a State (optional) πŸ—ΊοΈ', options=state_options, index=state_options.index('MN') if 'MN' in state_options else 0) use_specific_state = st.checkbox('Filter by selected state only? βœ…', value=True) # Function to process state files and match taxonomy codes def process_files(specialty_codes, specific_state='MN'): results = [] file_to_process = f'./{specific_state}.csv' if use_specific_state else state_files for file in [file_to_process] if use_specific_state else state_files: state_df = pd.read_csv(file, header=None) # Assume no header for simplicity for code in specialty_codes: # Filter rows where the 48th column matches the specialty code filtered_df = state_df[state_df[47] == code] if not filtered_df.empty: results.append((os.path.basename(file).replace('.csv', ''), filtered_df)) return results # Button to initiate analysis if st.button('Analyze Text Files for Selected Specialty πŸ”'): specialty_codes = filtered_specialties['Code'].unique() state_data = process_files(specialty_codes, selected_state if use_specific_state else 'MN') if state_data: for state, df in state_data: st.subheader(f"Providers in {state} with Specialty '{selected_specialty}':") st.dataframe(df) else: st.write("No matching records found in text files for the selected specialty.")