Spaces:

Tufan1
/

CVD-BERT-Deterministic

Sleeping

File size: 11,685 Bytes

# app.py
import os
import streamlit as st
st.set_page_config(page_title="Cardio Disease Predictor", layout="wide")
import torch
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import speech_recognition as sr
from pydub import AudioSegment
import io
from audiorecorder import audiorecorder

# Configuration - Force CPU usage
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
DEVICE = torch.device("cpu")  # Force CPU only

# Load model and tokenizer with caching
@st.cache_resource
def load_model():
    model_name = "Tufan1/BioClinicalBERT-Cardio-Classifier-Fold-per1"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(
        model_name, 
        num_labels=1
    ).to(DEVICE)
    model.eval()
    return tokenizer, model

tokenizer, model = load_model()

# Medical mappings
MEDICAL_MAPPINGS = {
    'gender': {1: "Female", 2: "Male"},
    'cholesterol': {1: "Normal", 2: "Elevated", 3: "Peak"},
    'glucose': {1: "Normal", 2: "High", 3: "Extreme"},
    'binary': {0: "No", 1: "Yes"}
}

# Streamlit UI
#st.set_page_config(page_title="Cardio Disease Predictor", layout="wide")
st.title("🫀 Cardiovascular Disease Prediction")

def safe_extract(pattern, text, group=1, default=None):
    """Regex helper function"""
    match = re.search(pattern, text, re.IGNORECASE)
    return match.group(group).lower() if match else default

def text_to_features(text):
    """Extract features from text input"""
    patterns = {
        'age': r'(\d+)\s*(?:years?|yrs?|year-old)',
        'gender': r'\b(male|female|man|woman)\b',
        'height': r'(?:height|ht)\D*(\d+)\s*cm',
        'weight': r'(?:weight|wt)\D*(\d+)\s*kg',
        'bp': r'(?:blood pressure|bp)\D*(\d+)\s*/\s*(\d+)',
        'cholesterol': r'(cholest(?:erol)?)\s*(?:is|level)?\s*(normal|elevated|peak)',
        'glucose': r'(glucose|sugar)\s*(?:is|level)?\s*(normal|high|extreme)',
        'smoke': r'\b(smokes?|smoking|smoke)\b',
        'alco': r'\b(alcohol|drinks?)\b',
        'active': r'\b(active|exercise|exercises)\b'
    }
    
    features = {
        'age': int(safe_extract(patterns['age'], text, 1, '0')) or None,
        'gender': 2 if safe_extract(patterns['gender'], text, 1, '').lower() in ['male', 'man'] else 1,
        'height': int(safe_extract(patterns['height'], text, 1, '0')) or None,
        'weight': int(safe_extract(patterns['weight'], text, 1, '0')) or None,
        'ap_hi': None,
        'ap_lo': None,
        'cholesterol': 1,
        'glucose': 1,
        'smoke': 1 if safe_extract(patterns['smoke'], text) else 0,
        'alco': 1 if safe_extract(patterns['alco'], text) else 0,
        'active': 1 if safe_extract(patterns['active'], text) else 0
    }

    # BP handling
    bp_match = re.search(patterns['bp'], text, re.IGNORECASE)
    if bp_match and len(bp_match.groups()) == 2:
        features['ap_hi'], features['ap_lo'] = map(int, bp_match.groups())

    # Cholesterol/glucose mapping
    chol_value = safe_extract(patterns['cholesterol'], text, 2)
    if chol_value:
        features['cholesterol'] = {'normal':1, 'elevated':2, 'peak':3}.get(chol_value, 1)
    
    gluc_value = safe_extract(patterns['glucose'], text, 2)
    if gluc_value:
        features['glucose'] = {'normal':1, 'high':2, 'extreme':3}.get(gluc_value, 1)

    return features

def process_audio(audio_file):
    """Handle audio uploads"""
    try:
        audio = AudioSegment.from_file(audio_file)
        audio_io = io.BytesIO()
        audio.export(audio_io, format="wav")
        audio_io.seek(0)
        
        recognizer = sr.Recognizer()
        with sr.AudioFile(audio_io) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except Exception as e:
        st.error(f"Audio processing error: {str(e)}")
        return None

def predict(features):
    """Make prediction"""
    input_text = f"""Patient Record:
- Age: {features['age']} years
- Gender: {MEDICAL_MAPPINGS['gender'][features['gender']]}
- Height: {features['height']} cm
- Weight: {features['weight']} kg
- BP: {features['ap_hi']}/{features['ap_lo']} mmHg
- Cholesterol: {MEDICAL_MAPPINGS['cholesterol'][features['cholesterol']]}
- Glucose: {MEDICAL_MAPPINGS['glucose'][features['glucose']]}
- Smoke: {MEDICAL_MAPPINGS['binary'][features['smoke']]}
- Alco: {MEDICAL_MAPPINGS['binary'][features['alco']]}
- Active: {MEDICAL_MAPPINGS['binary'][features['active']]}"""

    inputs = tokenizer(
        input_text,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    ).to(DEVICE)

    with torch.no_grad():
        logits = model(**inputs).logits
        prob = torch.sigmoid(logits).item()

    return "Cardiovascular Disease" if prob >= 0.5 else "No Cardiovascular Disease", round(prob*100, 2)

# Main UI
input_type = st.radio("Select input method:", ["Manual Input", "Text Description", "Audio Recording"])

if input_type == "Manual Input":
    with st.form("manual_input"):
        col1, col2 = st.columns(2)
        with col1:
            age = st.number_input("Age (years)", 10, 120)
            height = st.number_input("Height (cm)", 50, 250)
            ap_hi = st.number_input("Systolic BP", 80, 250)
            cholesterol = st.selectbox("Cholesterol", ["Normal", "Elevated", "Peak"])
            smoke = st.selectbox("Smoke", ["Yes", "No"])
            active = st.selectbox("Active", ["Yes", "No"])
        with col2:
            gender = st.selectbox("Gender", ["Female", "Male"])
            weight = st.number_input("Weight (kg)", 30, 300)
            ap_lo = st.number_input("Diastolic BP", 40, 150)
            glucose = st.selectbox("Glucose", ["Normal", "High", "Extreme"])
            alco = st.selectbox("Alco", ["Yes", "No"])
        
        submitted = st.form_submit_button("Predict")
        if submitted:
            features = {
                'age': age,
                'gender': 1 if gender == "Female" else 2,
                'height': height,
                'weight': weight,
                'ap_hi': ap_hi,
                'ap_lo': ap_lo,
                'cholesterol': ["Normal", "Elevated", "Peak"].index(cholesterol) + 1,
                'glucose': ["Normal", "High", "Extreme"].index(glucose) + 1,
                'smoke': 1 if smoke == "Yes" else 0,
                'alco': 1 if alco == "Yes" else 0,
                'active': 1 if active == "Yes" else 0,

            }
            prediction, confidence = predict(features)
            st.success(f"**Prediction:** {prediction} (Confidence: {confidence}%)")

elif input_type == "Text Description":
    text_input = st.text_area("Enter patient description:", height=150)
    if st.button("Analyze Text"):
        features = text_to_features(text_input)
        if all(v is not None for v in features.values()):
            prediction, confidence = predict(features)
            st.success(f"**Prediction:** {prediction} (Confidence: {confidence}%)")
        else:
            st.warning("Could not extract all required parameters from text")

elif input_type == "Audio Recording":
    st.subheader("🎙️ Record or Upload Audio")

    # Upload audio file
    audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "ogg"])

    # Record audio using mic (Streamlit extension)
    try:
        #from streamlit_audiorecorder import audiorecorder
        recorded_audio = audiorecorder("Start Recording", "Stop Recording")

        if recorded_audio is not None and len(recorded_audio) > 0:
            if st.button("Transcribe & Predict (Recorded Audio)"):
                try:
                    # If the recorder returns AudioSegment, convert to bytes
                    if isinstance(recorded_audio, AudioSegment):
                        audio_io = io.BytesIO()
                        recorded_audio.export(audio_io, format="wav")
                        audio_bytes = audio_io.getvalue()
                    else:
                        audio_bytes = recorded_audio

                    # Playback
                    st.audio(audio_bytes, format="audio/wav")

                    # Speech recognition
                    recognizer = sr.Recognizer()
                    with sr.AudioFile(io.BytesIO(audio_bytes)) as source:
                        audio_data = recognizer.record(source)
                        text = recognizer.recognize_google(audio_data)
                        st.markdown("**Transcribed Text:**")
                        st.info(text)

                        features = text_to_features(text)
                        missing = [k for k, v in features.items() if v is None]

                        if missing:
                            st.warning(f"Missing fields: {', '.join(missing)}. Using default values.")
                            features.setdefault("age", 50)
                            features.setdefault("gender", 2)
                            features.setdefault("height", 165)
                            features.setdefault("weight", 70)
                            features.setdefault("ap_hi", 120)
                            features.setdefault("ap_lo", 80)
                            features.setdefault("cholesterol", 1)
                            features.setdefault("glucose", 1)
                            features.setdefault("smoke", 0)
                            features.setdefault("alco", 1)
                            features.setdefault("active", 1)

                        prediction, confidence = predict(features)
                        st.success(f"**Prediction:** {prediction} (Confidence: {confidence}%)")

                except Exception as e:
                    st.error(f"Recording Processing Error: {e}")

    except Exception as e:
        st.info("🎙️ Audio recording not supported in this environment.")

    # Uploaded audio
    if audio_file is not None:
        if st.button("Transcribe & Predict (Uploaded Audio)"):
            try:
                audio_bytes = audio_file.read()
                st.audio(audio_bytes, format=audio_file.type)

                recognizer = sr.Recognizer()
                with sr.AudioFile(io.BytesIO(audio_bytes)) as source:
                    audio_data = recognizer.record(source)
                    text = recognizer.recognize_google(audio_data)
                    st.markdown("**Transcribed Text:**")
                    st.info(text)

                    features = text_to_features(text)
                    missing = [k for k, v in features.items() if v is None]

                    if missing:
                        st.warning(f"Missing fields: {', '.join(missing)}. Using default values.")
                        features.setdefault("age", 50)
                        features.setdefault("gender", 2)
                        features.setdefault("height", 165)
                        features.setdefault("weight", 70)
                        features.setdefault("ap_hi", 120)
                        features.setdefault("ap_lo", 80)
                        features.setdefault("cholesterol", 1)
                        features.setdefault("glucose", 1)
                        features.setdefault("smoke", 0)
                        features.setdefault("alco", 1)
                        features.setdefault("active", 1)

                    prediction, confidence = predict(features)
                    st.success(f"**Prediction:** {prediction} (Confidence: {confidence}%)")

            except Exception as e:
                st.error(f"Uploaded Audio Error: {e}")