import streamlit as st import re import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.model_selection import train_test_split # Load your symptom-disease data data = pd.read_csv("Symptom2Disease.csv") # Initialize the TF-IDF vectorizer tfidf_vectorizer = TfidfVectorizer() # Apply TF-IDF vectorization to the preprocessed text data X = tfidf_vectorizer.fit_transform(data['text']) # Split the dataset into a training set and a testing set X_train, X_test, y_train, y_test = train_test_split(X, data['label'], test_size=0.2, random_state=42) # Initialize the Multinomial Naive Bayes model model = MultinomialNB() # Train the model on the training data model.fit(X_train, y_train) # Set Streamlit app title with emojis st.title("Health Symptom-to-Disease Predictor 🏥👨‍⚕️") # Define a sidebar st.sidebar.title("Tool Definition") st.sidebar.markdown("This tool helps you identify possible diseases based on the symptoms you provide.") st.sidebar.markdown("the tool may aid healthcare professionals in the initial assessment of potential conditions, facilitating quicker decision-making and improving patient care") st.sidebar.title("⚠️ **Limitation**") st.sidebar.markdown("This tool's predictions are based solely on symptom descriptions and may not account for other critical factors,") st.sidebar.markdown("such as a patient's medical history or laboratory tests,") st.sidebar.markdown("As such,it should be used as an initial reference and not as a sole diagnostic tool. 👩‍⚕️") st.warning("Please note that this tool is for informational purposes only. Always consult a healthcare professional for accurate medical advice.") show_faqs = st.sidebar.checkbox("Show FAQs") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Function to preprocess user input def preprocess_input(user_input): user_input = user_input.lower() # Convert to lowercase user_input = re.sub(r"[^a-zA-Z\s]", "", user_input) # Remove special characters and numbers user_input = " ".join(user_input.split()) # Remove extra spaces return user_input # Function to predict diseases based on user input def predict_diseases(user_clean_text): user_input_vector = tfidf_vectorizer.transform([user_clean_text]) # Vectorize the cleaned user input predictions = model.predict(user_input_vector) # Make predictions using the trained model return predictions # Add user input section user_input = st.text_area("Enter your symptoms (how you feel):", key="user_input") # Add button to predict disease if st.button("Predict Disease"): # Display loading message with st.spinner("Diagnosing patient..."): # Check if user input is not empty if user_input: cleaned_input = preprocess_input(user_input) predicted_diseases = predict_diseases(cleaned_input) # Display predicted diseases st.session_state.messages.append({"role": "user", "content": user_input}) st.session_state.messages.append({"role": "assistant", "content": f"Based on your symptoms, you might have {', '.join(predicted_diseases)}."}) st.write("Based on your symptoms, you might have:") for disease in predicted_diseases: st.write(f"- {disease}") else: st.warning("Please enter your symptoms before predicting.") # Create FAQs section if show_faqs: st.markdown("## Frequently Asked Questions") st.markdown("**Q: How does this tool work?**") st.markdown("A: The tool uses a machine learning model to analyze the symptoms you enter and predicts possible diseases based on a pre-trained dataset.") st.markdown("**Q: Is this a substitute for a doctor's advice?**") st.markdown("A: No, this tool is for informational purposes only. It's essential to consult a healthcare professional for accurate medical advice.") st.markdown("**Q: Can I trust the predictions?**") st.markdown("A: While the tool provides predictions, it's not a guarantee of accuracy. It's always best to consult a healthcare expert for a reliable diagnosis.") # Add attribution st.markdown("Created with ❤️ by Joas")