github-actions[bot]
		
	commited on
		
		
					Commit 
							
							Β·
						
						e9d74dc
	
1
								Parent(s):
							
							5f402a9
								
π€ Auto-sync from GitHub 502b1dc4d7694ca52daf67481f49a16413e51144
Browse files- .github/workflows/deploy-to-hf.yml +13 -12
- hf-space/app.py +1 -1
- hf-space/hf-space/hf-space/.gitignore +6 -0
- hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml +70 -0
- hf-space/hf-space/hf-space/hf-space/app.py +59 -0
- hf-space/hf-space/hf-space/hf-space/hf-space/README.md +22 -0
- hf-space/hf-space/hf-space/hf-space/requirements.txt +9 -0
- hf-space/hf-space/hf-space/hf-space/train_model.py +124 -0
- hf-space/hf-space/train_model.py +1 -1
    	
        .github/workflows/deploy-to-hf.yml
    CHANGED
    
    | @@ -9,6 +9,13 @@ jobs: | |
| 9 | 
             
              deploy:
         | 
| 10 | 
             
                runs-on: ubuntu-latest
         | 
| 11 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 12 | 
             
                steps:
         | 
| 13 | 
             
                  - name: π§© Checkout repository
         | 
| 14 | 
             
                    uses: actions/checkout@v4
         | 
| @@ -22,6 +29,7 @@ jobs: | |
| 22 | 
             
                    run: |
         | 
| 23 | 
             
                      pip install --upgrade pip
         | 
| 24 | 
             
                      pip install -r requirements.txt
         | 
|  | |
| 25 |  | 
| 26 | 
             
                  - name: π§  Train the healthcare model
         | 
| 27 | 
             
                    run: |
         | 
| @@ -34,10 +42,6 @@ jobs: | |
| 34 | 
             
                      git config --global user.name "github-actions[bot]"
         | 
| 35 |  | 
| 36 | 
             
                  - name: π Clone Hugging Face Space
         | 
| 37 | 
            -
                    env:
         | 
| 38 | 
            -
                      HF_USERNAME: "udaysankarjalli"
         | 
| 39 | 
            -
                      HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
         | 
| 40 | 
            -
                      SPACE_NAME: "healthcare-disease-predictor"
         | 
| 41 | 
             
                    run: |
         | 
| 42 | 
             
                      if [ -z "$HF_TOKEN" ]; then
         | 
| 43 | 
             
                        echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
         | 
| @@ -46,25 +50,22 @@ jobs: | |
| 46 | 
             
                      echo "πΉ Cloning Hugging Face Space..."
         | 
| 47 | 
             
                      git clone https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME hf-space
         | 
| 48 |  | 
| 49 | 
            -
                  
         | 
| 50 | 
            -
                  - name: π Sync files to Hugging Face Space
         | 
| 51 | 
             
                    run: |
         | 
| 52 | 
             
                      echo "πΉ Syncing files to Hugging Face Space..."
         | 
| 53 | 
            -
                      # Exclude .git and model folder
         | 
| 54 | 
             
                      rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
         | 
| 55 | 
             
                      cd hf-space
         | 
| 56 | 
             
                      git add .
         | 
| 57 | 
             
                      git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
         | 
| 58 |  | 
| 59 | 
            -
             | 
| 60 | 
            -
                  - name: π€ Push to Hugging Face
         | 
| 61 | 
             
                    run: |
         | 
| 62 | 
            -
                      echo "πΉ Pushing latest changes to Hugging Face..."
         | 
| 63 | 
             
                      cd hf-space
         | 
| 64 | 
             
                      git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
         | 
| 65 | 
            -
                      
         | 
| 66 |  | 
| 67 | 
             
                  - name: β
 Summary
         | 
| 68 | 
             
                    run: |
         | 
| 69 | 
             
                      echo "π Deployment completed successfully!"
         | 
| 70 | 
            -
                      echo " | 
|  | 
|  | |
| 9 | 
             
              deploy:
         | 
| 10 | 
             
                runs-on: ubuntu-latest
         | 
| 11 |  | 
| 12 | 
            +
                # β
 Global environment variables
         | 
| 13 | 
            +
                env:
         | 
| 14 | 
            +
                  HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}   # GitHub Actions secret
         | 
| 15 | 
            +
                  HF_USERNAME: "udaysankarjalli"
         | 
| 16 | 
            +
                  MODEL_REPO: "healthcare-disease-predictor-model"  # Separate repo for large model files
         | 
| 17 | 
            +
                  SPACE_NAME: "healthcare-disease-predictor"
         | 
| 18 | 
            +
             | 
| 19 | 
             
                steps:
         | 
| 20 | 
             
                  - name: π§© Checkout repository
         | 
| 21 | 
             
                    uses: actions/checkout@v4
         | 
|  | |
| 29 | 
             
                    run: |
         | 
| 30 | 
             
                      pip install --upgrade pip
         | 
| 31 | 
             
                      pip install -r requirements.txt
         | 
| 32 | 
            +
                      pip install huggingface-hub
         | 
| 33 |  | 
| 34 | 
             
                  - name: π§  Train the healthcare model
         | 
| 35 | 
             
                    run: |
         | 
|  | |
| 42 | 
             
                      git config --global user.name "github-actions[bot]"
         | 
| 43 |  | 
| 44 | 
             
                  - name: π Clone Hugging Face Space
         | 
|  | |
|  | |
|  | |
|  | |
| 45 | 
             
                    run: |
         | 
| 46 | 
             
                      if [ -z "$HF_TOKEN" ]; then
         | 
| 47 | 
             
                        echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
         | 
|  | |
| 50 | 
             
                      echo "πΉ Cloning Hugging Face Space..."
         | 
| 51 | 
             
                      git clone https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME hf-space
         | 
| 52 |  | 
| 53 | 
            +
                  - name: π Sync app code to Hugging Face Space
         | 
|  | |
| 54 | 
             
                    run: |
         | 
| 55 | 
             
                      echo "πΉ Syncing files to Hugging Face Space..."
         | 
| 56 | 
            +
                      # Exclude .git and model folder (avoid large files)
         | 
| 57 | 
             
                      rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
         | 
| 58 | 
             
                      cd hf-space
         | 
| 59 | 
             
                      git add .
         | 
| 60 | 
             
                      git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
         | 
| 61 |  | 
| 62 | 
            +
                  - name: π€ Push app code to Hugging Face Space
         | 
|  | |
| 63 | 
             
                    run: |
         | 
|  | |
| 64 | 
             
                      cd hf-space
         | 
| 65 | 
             
                      git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
         | 
|  | |
| 66 |  | 
| 67 | 
             
                  - name: β
 Summary
         | 
| 68 | 
             
                    run: |
         | 
| 69 | 
             
                      echo "π Deployment completed successfully!"
         | 
| 70 | 
            +
                      echo "App is live: https://huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME"
         | 
| 71 | 
            +
                      echo "Model uploaded to HF Model Repo: https://huggingface.co/$HF_USERNAME/$MODEL_REPO"
         | 
    	
        hf-space/app.py
    CHANGED
    
    | @@ -8,7 +8,7 @@ from huggingface_hub import hf_hub_download | |
| 8 | 
             
            # -----------------------------
         | 
| 9 | 
             
            # Step 1: Download model from Hugging Face Hub
         | 
| 10 | 
             
            # ----------------------------
         | 
| 11 | 
            -
            HF_TOKEN = os.environ.get(" | 
| 12 | 
             
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
         | 
| 13 | 
             
            MODEL_FILENAME = "healthcare_model.joblib"
         | 
| 14 |  | 
|  | |
| 8 | 
             
            # -----------------------------
         | 
| 9 | 
             
            # Step 1: Download model from Hugging Face Hub
         | 
| 10 | 
             
            # ----------------------------
         | 
| 11 | 
            +
            HF_TOKEN = os.environ.get("github_actions_deploy_healthcare")# GitHub Actions or Space secret
         | 
| 12 | 
             
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
         | 
| 13 | 
             
            MODEL_FILENAME = "healthcare_model.joblib"
         | 
| 14 |  | 
    	
        hf-space/hf-space/hf-space/.gitignore
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Ignore model artifacts
         | 
| 2 | 
            +
            model/
         | 
| 3 | 
            +
            *.joblib
         | 
| 4 | 
            +
            *.pkl
         | 
| 5 | 
            +
            *.npy
         | 
| 6 | 
            +
            *.csv
         | 
    	
        hf-space/hf-space/hf-space/hf-space/.github/workflows/deploy-to-hf.yml
    ADDED
    
    | @@ -0,0 +1,70 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            name: Deploy Healthcare Model to Hugging Face
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            on:
         | 
| 4 | 
            +
              push:
         | 
| 5 | 
            +
                branches:
         | 
| 6 | 
            +
                  - main
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            jobs:
         | 
| 9 | 
            +
              deploy:
         | 
| 10 | 
            +
                runs-on: ubuntu-latest
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                steps:
         | 
| 13 | 
            +
                  - name: π§© Checkout repository
         | 
| 14 | 
            +
                    uses: actions/checkout@v4
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                  - name: π Set up Python
         | 
| 17 | 
            +
                    uses: actions/setup-python@v4
         | 
| 18 | 
            +
                    with:
         | 
| 19 | 
            +
                      python-version: "3.10"
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  - name: π¦ Install dependencies
         | 
| 22 | 
            +
                    run: |
         | 
| 23 | 
            +
                      pip install --upgrade pip
         | 
| 24 | 
            +
                      pip install -r requirements.txt
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  - name: π§  Train the healthcare model
         | 
| 27 | 
            +
                    run: |
         | 
| 28 | 
            +
                      echo "Starting model training..."
         | 
| 29 | 
            +
                      python train_model.py
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                  - name: βοΈ Set up Git identity
         | 
| 32 | 
            +
                    run: |
         | 
| 33 | 
            +
                      git config --global user.email "github-actions[bot]@users.noreply.github.com"
         | 
| 34 | 
            +
                      git config --global user.name "github-actions[bot]"
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                  - name: π Clone Hugging Face Space
         | 
| 37 | 
            +
                    env:
         | 
| 38 | 
            +
                      HF_USERNAME: "udaysankarjalli"
         | 
| 39 | 
            +
                      HF_TOKEN: ${{ secrets.HG_HEALTHCAREDATA }}
         | 
| 40 | 
            +
                      SPACE_NAME: "healthcare-disease-predictor"
         | 
| 41 | 
            +
                    run: |
         | 
| 42 | 
            +
                      if [ -z "$HF_TOKEN" ]; then
         | 
| 43 | 
            +
                        echo "β ERROR: Hugging Face token not found. Please add 'HG_HEALTHCAREDATA' secret in your repo settings."
         | 
| 44 | 
            +
                        exit 1
         | 
| 45 | 
            +
                      fi
         | 
| 46 | 
            +
                      echo "πΉ Cloning Hugging Face Space..."
         | 
| 47 | 
            +
                      git clone https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME hf-space
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  
         | 
| 50 | 
            +
                  - name: π Sync files to Hugging Face Space
         | 
| 51 | 
            +
                    run: |
         | 
| 52 | 
            +
                      echo "πΉ Syncing files to Hugging Face Space..."
         | 
| 53 | 
            +
                      # Exclude .git and model folder
         | 
| 54 | 
            +
                      rsync -av --delete --exclude='.git' --exclude='model/' ./ hf-space/ || [ $? -eq 24 ]
         | 
| 55 | 
            +
                      cd hf-space
         | 
| 56 | 
            +
                      git add .
         | 
| 57 | 
            +
                      git commit -m "π€ Auto-sync from GitHub $GITHUB_SHA" || echo "No changes to commit"
         | 
| 58 | 
            +
             | 
| 59 | 
            +
             | 
| 60 | 
            +
                  - name: π€ Push to Hugging Face
         | 
| 61 | 
            +
                    run: |
         | 
| 62 | 
            +
                      echo "πΉ Pushing latest changes to Hugging Face..."
         | 
| 63 | 
            +
                      cd hf-space
         | 
| 64 | 
            +
                      git push origin main || { echo "β Push failed. Check your Hugging Face token permissions."; exit 1; }
         | 
| 65 | 
            +
                      
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  - name: β
 Summary
         | 
| 68 | 
            +
                    run: |
         | 
| 69 | 
            +
                      echo "π Deployment completed successfully!"
         | 
| 70 | 
            +
                      echo "Your model and app are now live on Hugging Face: https://huggingface.co/spaces/udaysankarjalli/healthcare-disease-predictor"
         | 
    	
        hf-space/hf-space/hf-space/hf-space/app.py
    ADDED
    
    | @@ -0,0 +1,59 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            import joblib
         | 
| 3 | 
            +
            import pandas as pd
         | 
| 4 | 
            +
            import numpy as np
         | 
| 5 | 
            +
            import os
         | 
| 6 | 
            +
            from huggingface_hub import hf_hub_download
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # -----------------------------
         | 
| 9 | 
            +
            # Step 1: Download model from Hugging Face Hub
         | 
| 10 | 
            +
            # ----------------------------
         | 
| 11 | 
            +
            HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Actions or Space secret
         | 
| 12 | 
            +
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"
         | 
| 13 | 
            +
            MODEL_FILENAME = "healthcare_model.joblib"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            try:
         | 
| 16 | 
            +
                model_path = hf_hub_download(
         | 
| 17 | 
            +
                    repo_id=REPO_ID,
         | 
| 18 | 
            +
                    filename=MODEL_FILENAME,
         | 
| 19 | 
            +
                    token=HF_TOKEN
         | 
| 20 | 
            +
                )
         | 
| 21 | 
            +
                print(f"β
 Model downloaded successfully: {model_path}")
         | 
| 22 | 
            +
            except Exception as e:
         | 
| 23 | 
            +
                raise FileNotFoundError(f"Failed to download model from HF Hub: {e}")
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            # Load the trained pipeline
         | 
| 26 | 
            +
            pipe = joblib.load(model_path)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # ----------------------------
         | 
| 29 | 
            +
            # Step 2: Prediction function
         | 
| 30 | 
            +
            # ----------------------------
         | 
| 31 | 
            +
            def predict_top_k(symptoms_text, duration_days, severity):
         | 
| 32 | 
            +
                row = {
         | 
| 33 | 
            +
                    'symptoms_text': symptoms_text,
         | 
| 34 | 
            +
                    'duration_days_reported': duration_days,
         | 
| 35 | 
            +
                    'severity_level': severity
         | 
| 36 | 
            +
                }
         | 
| 37 | 
            +
                X = pd.DataFrame([row])
         | 
| 38 | 
            +
                proba = pipe.predict_proba(X)[0]
         | 
| 39 | 
            +
                classes = pipe.classes_
         | 
| 40 | 
            +
                idx = np.argsort(proba)[::-1][:3]
         | 
| 41 | 
            +
                return [{'disease': classes[i], 'probability': float(proba[i])} for i in idx]
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            # ----------------------------
         | 
| 44 | 
            +
            # Step 3: Gradio Interface
         | 
| 45 | 
            +
            # ----------------------------
         | 
| 46 | 
            +
            iface = gr.Interface(
         | 
| 47 | 
            +
                fn=predict_top_k,
         | 
| 48 | 
            +
                inputs=[
         | 
| 49 | 
            +
                    gr.Textbox(label="Symptoms Text"),
         | 
| 50 | 
            +
                    gr.Number(label="Duration (days)"),
         | 
| 51 | 
            +
                    gr.Dropdown(label="Severity Level", choices=['mild', 'moderate', 'severe'], value='mild')
         | 
| 52 | 
            +
                ],
         | 
| 53 | 
            +
                outputs=gr.JSON(label="Top 3 Predicted Diseases"),
         | 
| 54 | 
            +
                title="π©Ί Healthcare Disease Prediction",
         | 
| 55 | 
            +
                description="Enter symptoms and details to get top disease predictions."
         | 
| 56 | 
            +
            )
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            if __name__ == "__main__":
         | 
| 59 | 
            +
                iface.launch()
         | 
    	
        hf-space/hf-space/hf-space/hf-space/hf-space/README.md
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            title: Healthcare Disease Predictor
         | 
| 3 | 
            +
            emoji: π©Ί
         | 
| 4 | 
            +
            colorFrom: green
         | 
| 5 | 
            +
            colorTo: blue
         | 
| 6 | 
            +
            sdk: gradio
         | 
| 7 | 
            +
            sdk_version: "5.47.0"
         | 
| 8 | 
            +
            app_file: app.py
         | 
| 9 | 
            +
            pinned: false
         | 
| 10 | 
            +
            ---
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            ## π§  Features
         | 
| 13 | 
            +
            - Uses real healthcare dataset (`health_chatbot_structured_features.csv`)
         | 
| 14 | 
            +
            - Trains RandomForestClassifier with TF-IDF + OneHot + Median Imputation pipeline
         | 
| 15 | 
            +
            - Auto-saves `.joblib` model
         | 
| 16 | 
            +
            - Interactive Gradio interface for disease prediction
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            ## π CI/CD with GitHub Actions
         | 
| 19 | 
            +
            Whenever you push changes, GitHub Actions automatically:
         | 
| 20 | 
            +
            1. Runs `train_model.py`
         | 
| 21 | 
            +
            2. Saves trained model (`.joblib`)
         | 
| 22 | 
            +
            3. Pushes the updated model and app to your Hugging Face Space
         | 
    	
        hf-space/hf-space/hf-space/hf-space/requirements.txt
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            pandas
         | 
| 2 | 
            +
            numpy
         | 
| 3 | 
            +
            scikit-learn
         | 
| 4 | 
            +
            joblib
         | 
| 5 | 
            +
            gradio==5.47.0
         | 
| 6 | 
            +
            fastapi
         | 
| 7 | 
            +
            uvicorn
         | 
| 8 | 
            +
            pydantic
         | 
| 9 | 
            +
            starlette
         | 
    	
        hf-space/hf-space/hf-space/hf-space/train_model.py
    ADDED
    
    | @@ -0,0 +1,124 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import warnings
         | 
| 2 | 
            +
            warnings.filterwarnings('ignore')
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            import pandas as pd, numpy as np
         | 
| 5 | 
            +
            from pathlib import Path
         | 
| 6 | 
            +
            from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
         | 
| 7 | 
            +
            from sklearn.pipeline import Pipeline
         | 
| 8 | 
            +
            from sklearn.compose import ColumnTransformer
         | 
| 9 | 
            +
            from sklearn.feature_extraction.text import TfidfVectorizer
         | 
| 10 | 
            +
            from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
         | 
| 11 | 
            +
            from sklearn.impute import SimpleImputer
         | 
| 12 | 
            +
            from sklearn.ensemble import RandomForestClassifier
         | 
| 13 | 
            +
            from sklearn.metrics import classification_report, accuracy_score
         | 
| 14 | 
            +
            import joblib
         | 
| 15 | 
            +
            import os
         | 
| 16 | 
            +
            from huggingface_hub import HfApi
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            # ----------------------------
         | 
| 19 | 
            +
            # Step 1: Load dataset
         | 
| 20 | 
            +
            # ----------------------------
         | 
| 21 | 
            +
            df = pd.read_csv("health_chatbot_structured_features.csv")
         | 
| 22 | 
            +
            print("Data shape:", df.shape)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # ----------------------------
         | 
| 25 | 
            +
            # Step 2: Define features & target
         | 
| 26 | 
            +
            # ----------------------------
         | 
| 27 | 
            +
            features = ['symptoms_text', 'duration_days_reported', 'severity_level']
         | 
| 28 | 
            +
            target = 'disease_label'
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df[target])
         | 
| 31 | 
            +
            print("Train:", train_df.shape, " Test:", test_df.shape)
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            # ----------------------------
         | 
| 34 | 
            +
            # Step 3: Preprocessing setup
         | 
| 35 | 
            +
            # ----------------------------
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            def flatten_text(x):
         | 
| 38 | 
            +
                return x.ravel()
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            numeric_features = ['duration_days_reported']
         | 
| 41 | 
            +
            numeric_transformer = SimpleImputer(strategy='median')
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            categorical_features = ['severity_level']
         | 
| 44 | 
            +
            categorical_transformer = Pipeline([
         | 
| 45 | 
            +
                ('imputer', SimpleImputer(strategy='most_frequent')),
         | 
| 46 | 
            +
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
         | 
| 47 | 
            +
            ])
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            text_feature = 'symptoms_text'
         | 
| 50 | 
            +
            text_transformer = Pipeline([
         | 
| 51 | 
            +
                ('imputer', SimpleImputer(strategy='constant', fill_value='')),
         | 
| 52 | 
            +
                ('flatten', FunctionTransformer(flatten_text, validate=False)),
         | 
| 53 | 
            +
                ('tfidf', TfidfVectorizer(ngram_range=(1,2), max_df=0.95))
         | 
| 54 | 
            +
            ])
         | 
| 55 | 
            +
             | 
| 56 | 
            +
            preprocessor = ColumnTransformer([
         | 
| 57 | 
            +
                ('num', numeric_transformer, numeric_features),
         | 
| 58 | 
            +
                ('cat', categorical_transformer, categorical_features),
         | 
| 59 | 
            +
                ('text', text_transformer, [text_feature])
         | 
| 60 | 
            +
            ])
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            # ----------------------------
         | 
| 63 | 
            +
            # Step 4: Model Pipeline
         | 
| 64 | 
            +
            # ----------------------------
         | 
| 65 | 
            +
            pipe = Pipeline([
         | 
| 66 | 
            +
                ('preprocessor', preprocessor),
         | 
| 67 | 
            +
                ('clf', RandomForestClassifier(n_estimators=300, random_state=42, n_jobs=-1))
         | 
| 68 | 
            +
            ])
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            # ----------------------------
         | 
| 71 | 
            +
            # Step 5: Train & Evaluate
         | 
| 72 | 
            +
            # ----------------------------
         | 
| 73 | 
            +
            cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
         | 
| 74 | 
            +
            scores = cross_val_score(pipe, train_df[features], train_df[target], cv=cv, scoring='accuracy', n_jobs=-1)
         | 
| 75 | 
            +
            print(f"\nCV accuracy (mean Β± std): {scores.mean():.4f} Β± {scores.std():.4f}")
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            pipe.fit(train_df[features], train_df[target])
         | 
| 78 | 
            +
            preds = pipe.predict(test_df[features])
         | 
| 79 | 
            +
            print("\nTest accuracy:", accuracy_score(test_df[target], preds))
         | 
| 80 | 
            +
            print("\nClassification report:\n", classification_report(test_df[target], preds))
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            # ----------------------------
         | 
| 83 | 
            +
            # Step 6: Save model artifacts
         | 
| 84 | 
            +
            # ----------------------------
         | 
| 85 | 
            +
            Path("model").mkdir(exist_ok=True)
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            model_path = "model/healthcare_model.joblib"
         | 
| 88 | 
            +
            train_path = "model/train_data.csv"
         | 
| 89 | 
            +
            test_path = "model/test_data.csv"
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            joblib.dump(pipe, model_path)
         | 
| 92 | 
            +
            train_df.to_csv(train_path, index=False)
         | 
| 93 | 
            +
            test_df.to_csv(test_path, index=False)
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            print("\nβ
 Model and data saved successfully:")
         | 
| 96 | 
            +
            print(f"   Model  β {model_path}")
         | 
| 97 | 
            +
            print(f"   Train  β {train_path}")
         | 
| 98 | 
            +
            print(f"   Test   β {test_path}")
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            # ----------------------------
         | 
| 101 | 
            +
            # Step 7: Upload model to Hugging Face Hub
         | 
| 102 | 
            +
            # ----------------------------
         | 
| 103 | 
            +
            HF_TOKEN = os.environ.get("HF_TOKEN")  # GitHub Action secret
         | 
| 104 | 
            +
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            if HF_TOKEN:
         | 
| 107 | 
            +
                api = HfApi()
         | 
| 108 | 
            +
                # Upload model
         | 
| 109 | 
            +
                api.upload_file(path_or_fileobj=model_path,
         | 
| 110 | 
            +
                                path_in_repo="healthcare_model.joblib",
         | 
| 111 | 
            +
                                repo_id=REPO_ID,
         | 
| 112 | 
            +
                                token=HF_TOKEN)
         | 
| 113 | 
            +
                # Upload train/test CSVs (optional)
         | 
| 114 | 
            +
                api.upload_file(path_or_fileobj=train_path,
         | 
| 115 | 
            +
                                path_in_repo="train_data.csv",
         | 
| 116 | 
            +
                                repo_id=REPO_ID,
         | 
| 117 | 
            +
                                token=HF_TOKEN)
         | 
| 118 | 
            +
                api.upload_file(path_or_fileobj=test_path,
         | 
| 119 | 
            +
                                path_in_repo="test_data.csv",
         | 
| 120 | 
            +
                                repo_id=REPO_ID,
         | 
| 121 | 
            +
                                token=HF_TOKEN)
         | 
| 122 | 
            +
                print("β
 Model and data uploaded successfully to Hugging Face Hub!")
         | 
| 123 | 
            +
            else:
         | 
| 124 | 
            +
                print("β οΈ HF_TOKEN not found. Skipping upload.")
         | 
    	
        hf-space/hf-space/train_model.py
    CHANGED
    
    | @@ -100,7 +100,7 @@ print(f"   Test   β {test_path}") | |
| 100 | 
             
            # ----------------------------
         | 
| 101 | 
             
            # Step 7: Upload model to Hugging Face Hub
         | 
| 102 | 
             
            # ----------------------------
         | 
| 103 | 
            -
            HF_TOKEN = os.environ.get(" | 
| 104 | 
             
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
         | 
| 105 |  | 
| 106 | 
             
            if HF_TOKEN:
         | 
|  | |
| 100 | 
             
            # ----------------------------
         | 
| 101 | 
             
            # Step 7: Upload model to Hugging Face Hub
         | 
| 102 | 
             
            # ----------------------------
         | 
| 103 | 
            +
            HF_TOKEN = os.environ.get("HG_HEALTHCAREDATA")   # GitHub Action secret
         | 
| 104 | 
             
            REPO_ID = "udaysankarjalli/healthcare-disease-predictor-model"  # separate repo for large model files
         | 
| 105 |  | 
| 106 | 
             
            if HF_TOKEN:
         |