Justin Zhang commited on
Commit
cb49ee4
·
1 Parent(s): 683cd11

Update executed notebook and static HTML export with outputs

Browse files
notebooks/01_data_exploration.html CHANGED
@@ -7526,10 +7526,10 @@ a.anchor-link {
7526
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7527
  </div>
7528
  <div class="jp-InputArea jp-Cell-inputArea">
7529
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7530
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7531
  <div class="cm-editor cm-s-jupyter">
7532
- <div class="highlight hl-python"><pre><span></span><span class="c1"># Import libraries</span>
7533
  <span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pd</span>
7534
  <span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
7535
  <span class="kn">import</span><span class="w"> </span><span class="nn">seaborn</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">sns</span>
@@ -7538,15 +7538,15 @@ a.anchor-link {
7538
  </div>
7539
  </div>
7540
  </div>
7541
- </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs" id="cell-id=48479ace">
7542
  <div class="jp-Cell-inputWrapper" tabindex="0">
7543
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7544
  </div>
7545
  <div class="jp-InputArea jp-Cell-inputArea">
7546
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7547
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7548
  <div class="cm-editor cm-s-jupyter">
7549
- <div class="highlight hl-python"><pre><span></span><span class="c1"># Load synthetic data</span>
7550
  <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">'../data/synthetic_oncology_patients.csv'</span><span class="p">)</span>
7551
  <span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
7552
  </pre></div>
@@ -7554,6 +7554,116 @@ a.anchor-link {
7554
  </div>
7555
  </div>
7556
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7557
  </div>
7558
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=ba0324c5">
7559
  <div class="jp-Cell-inputWrapper" tabindex="0">
@@ -7565,21 +7675,137 @@ a.anchor-link {
7565
  </div>
7566
  </div>
7567
  </div>
7568
- </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs" id="cell-id=525868d7">
7569
  <div class="jp-Cell-inputWrapper" tabindex="0">
7570
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7571
  </div>
7572
  <div class="jp-InputArea jp-Cell-inputArea">
7573
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7574
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7575
  <div class="cm-editor cm-s-jupyter">
7576
- <div class="highlight hl-python"><pre><span></span><span class="c1"># Demographic summary</span>
7577
  <span class="n">df</span><span class="p">[[</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'gender'</span><span class="p">,</span> <span class="s1">'cancer_type'</span><span class="p">,</span> <span class="s1">'stage'</span><span class="p">]]</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span>
7578
  </pre></div>
7579
  </div>
7580
  </div>
7581
  </div>
7582
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7583
  </div>
7584
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=f872f204">
7585
  <div class="jp-Cell-inputWrapper" tabindex="0">
@@ -7591,15 +7817,15 @@ a.anchor-link {
7591
  </div>
7592
  </div>
7593
  </div>
7594
- </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs" id="cell-id=8b89480d">
7595
  <div class="jp-Cell-inputWrapper" tabindex="0">
7596
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7597
  </div>
7598
  <div class="jp-InputArea jp-Cell-inputArea">
7599
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7600
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7601
  <div class="cm-editor cm-s-jupyter">
7602
- <div class="highlight hl-python"><pre><span></span><span class="n">sns</span><span class="o">.</span><span class="n">countplot</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">df</span><span class="p">,</span> <span class="n">x</span><span class="o">=</span><span class="s1">'cancer_type'</span><span class="p">,</span> <span class="n">hue</span><span class="o">=</span><span class="s1">'gender'</span><span class="p">)</span>
7603
  <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Cancer Type by Gender'</span><span class="p">)</span>
7604
  <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
7605
  </pre></div>
@@ -7607,6 +7833,18 @@ a.anchor-link {
7607
  </div>
7608
  </div>
7609
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
7610
  </div>
7611
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=d158309e">
7612
  <div class="jp-Cell-inputWrapper" tabindex="0">
@@ -7618,20 +7856,196 @@ a.anchor-link {
7618
  </div>
7619
  </div>
7620
  </div>
7621
- </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs" id="cell-id=a878a045">
7622
  <div class="jp-Cell-inputWrapper" tabindex="0">
7623
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7624
  </div>
7625
  <div class="jp-InputArea jp-Cell-inputArea">
7626
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7627
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7628
  <div class="cm-editor cm-s-jupyter">
7629
- <div class="highlight hl-python"><pre><span></span><span class="n">pd</span><span class="o">.</span><span class="n">crosstab</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'biomarker_status'</span><span class="p">],</span> <span class="n">df</span><span class="p">[</span><span class="s1">'treatment'</span><span class="p">])</span>
7630
  </pre></div>
7631
  </div>
7632
  </div>
7633
  </div>
7634
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7635
  </div>
7636
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=50dd531c">
7637
  <div class="jp-Cell-inputWrapper" tabindex="0">
@@ -7643,15 +8057,15 @@ a.anchor-link {
7643
  </div>
7644
  </div>
7645
  </div>
7646
- </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs" id="cell-id=8a9cf12f">
7647
  <div class="jp-Cell-inputWrapper" tabindex="0">
7648
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7649
  </div>
7650
  <div class="jp-InputArea jp-Cell-inputArea">
7651
- <div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
7652
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7653
  <div class="cm-editor cm-s-jupyter">
7654
- <div class="highlight hl-python"><pre><span></span><span class="n">sns</span><span class="o">.</span><span class="n">histplot</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'survival_months'</span><span class="p">],</span> <span class="n">bins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">kde</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
7655
  <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s1">'Survival (months)'</span><span class="p">)</span>
7656
  <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Distribution of Survival Time'</span><span class="p">)</span>
7657
  <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
@@ -7660,6 +8074,18 @@ a.anchor-link {
7660
  </div>
7661
  </div>
7662
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
7663
  </div>
7664
  </main>
7665
  </body>
 
7526
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7527
  </div>
7528
  <div class="jp-InputArea jp-Cell-inputArea">
7529
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [1]:</div>
7530
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7531
  <div class="cm-editor cm-s-jupyter">
7532
+ <div class="highlight hl-ipython3"><pre><span></span><span class="c1"># Import libraries</span>
7533
  <span class="kn">import</span><span class="w"> </span><span class="nn">pandas</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pd</span>
7534
  <span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
7535
  <span class="kn">import</span><span class="w"> </span><span class="nn">seaborn</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">sns</span>
 
7538
  </div>
7539
  </div>
7540
  </div>
7541
+ </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell" id="cell-id=48479ace">
7542
  <div class="jp-Cell-inputWrapper" tabindex="0">
7543
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7544
  </div>
7545
  <div class="jp-InputArea jp-Cell-inputArea">
7546
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [2]:</div>
7547
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7548
  <div class="cm-editor cm-s-jupyter">
7549
+ <div class="highlight hl-ipython3"><pre><span></span><span class="c1"># Load synthetic data</span>
7550
  <span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">read_csv</span><span class="p">(</span><span class="s1">'../data/synthetic_oncology_patients.csv'</span><span class="p">)</span>
7551
  <span class="n">df</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
7552
  </pre></div>
 
7554
  </div>
7555
  </div>
7556
  </div>
7557
+ <div class="jp-Cell-outputWrapper">
7558
+ <div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
7559
+ </div>
7560
+ <div class="jp-OutputArea jp-Cell-outputArea">
7561
+ <div class="jp-OutputArea-child jp-OutputArea-executeResult">
7562
+ <div class="jp-OutputPrompt jp-OutputArea-prompt">Out[2]:</div>
7563
+ <div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
7564
+ <div>
7565
+ <style scoped="">
7566
+ .dataframe tbody tr th:only-of-type {
7567
+ vertical-align: middle;
7568
+ }
7569
+
7570
+ .dataframe tbody tr th {
7571
+ vertical-align: top;
7572
+ }
7573
+
7574
+ .dataframe thead th {
7575
+ text-align: right;
7576
+ }
7577
+ </style>
7578
+ <table border="1" class="dataframe">
7579
+ <thead>
7580
+ <tr style="text-align: right;">
7581
+ <th></th>
7582
+ <th>patient_id</th>
7583
+ <th>age</th>
7584
+ <th>gender</th>
7585
+ <th>cancer_type</th>
7586
+ <th>stage</th>
7587
+ <th>diagnosis_date</th>
7588
+ <th>biomarker_status</th>
7589
+ <th>treatment</th>
7590
+ <th>adverse_event</th>
7591
+ <th>survival_months</th>
7592
+ </tr>
7593
+ </thead>
7594
+ <tbody>
7595
+ <tr>
7596
+ <th>0</th>
7597
+ <td>P001</td>
7598
+ <td>67</td>
7599
+ <td>F</td>
7600
+ <td>Breast</td>
7601
+ <td>II</td>
7602
+ <td>2021-03-15</td>
7603
+ <td>HER2+</td>
7604
+ <td>Trastuzumab</td>
7605
+ <td>NaN</td>
7606
+ <td>28</td>
7607
+ </tr>
7608
+ <tr>
7609
+ <th>1</th>
7610
+ <td>P002</td>
7611
+ <td>59</td>
7612
+ <td>M</td>
7613
+ <td>Lung</td>
7614
+ <td>III</td>
7615
+ <td>2020-11-02</td>
7616
+ <td>EGFR+</td>
7617
+ <td>Osimertinib</td>
7618
+ <td>Rash</td>
7619
+ <td>18</td>
7620
+ </tr>
7621
+ <tr>
7622
+ <th>2</th>
7623
+ <td>P003</td>
7624
+ <td>72</td>
7625
+ <td>F</td>
7626
+ <td>Colorectal</td>
7627
+ <td>IV</td>
7628
+ <td>2019-07-21</td>
7629
+ <td>BRAF-</td>
7630
+ <td>FOLFOX</td>
7631
+ <td>Neuropathy</td>
7632
+ <td>12</td>
7633
+ </tr>
7634
+ <tr>
7635
+ <th>3</th>
7636
+ <td>P004</td>
7637
+ <td>50</td>
7638
+ <td>M</td>
7639
+ <td>Prostate</td>
7640
+ <td>II</td>
7641
+ <td>2022-01-10</td>
7642
+ <td>AR+</td>
7643
+ <td>Abiraterone</td>
7644
+ <td>NaN</td>
7645
+ <td>30</td>
7646
+ </tr>
7647
+ <tr>
7648
+ <th>4</th>
7649
+ <td>P005</td>
7650
+ <td>64</td>
7651
+ <td>F</td>
7652
+ <td>Ovarian</td>
7653
+ <td>III</td>
7654
+ <td>2021-06-18</td>
7655
+ <td>BRCA1+</td>
7656
+ <td>Carboplatin</td>
7657
+ <td>Neutropenia</td>
7658
+ <td>22</td>
7659
+ </tr>
7660
+ </tbody>
7661
+ </table>
7662
+ </div>
7663
+ </div>
7664
+ </div>
7665
+ </div>
7666
+ </div>
7667
  </div>
7668
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=ba0324c5">
7669
  <div class="jp-Cell-inputWrapper" tabindex="0">
 
7675
  </div>
7676
  </div>
7677
  </div>
7678
+ </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell" id="cell-id=525868d7">
7679
  <div class="jp-Cell-inputWrapper" tabindex="0">
7680
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7681
  </div>
7682
  <div class="jp-InputArea jp-Cell-inputArea">
7683
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [3]:</div>
7684
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7685
  <div class="cm-editor cm-s-jupyter">
7686
+ <div class="highlight hl-ipython3"><pre><span></span><span class="c1"># Demographic summary</span>
7687
  <span class="n">df</span><span class="p">[[</span><span class="s1">'age'</span><span class="p">,</span> <span class="s1">'gender'</span><span class="p">,</span> <span class="s1">'cancer_type'</span><span class="p">,</span> <span class="s1">'stage'</span><span class="p">]]</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">'all'</span><span class="p">)</span>
7688
  </pre></div>
7689
  </div>
7690
  </div>
7691
  </div>
7692
  </div>
7693
+ <div class="jp-Cell-outputWrapper">
7694
+ <div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
7695
+ </div>
7696
+ <div class="jp-OutputArea jp-Cell-outputArea">
7697
+ <div class="jp-OutputArea-child jp-OutputArea-executeResult">
7698
+ <div class="jp-OutputPrompt jp-OutputArea-prompt">Out[3]:</div>
7699
+ <div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
7700
+ <div>
7701
+ <style scoped="">
7702
+ .dataframe tbody tr th:only-of-type {
7703
+ vertical-align: middle;
7704
+ }
7705
+
7706
+ .dataframe tbody tr th {
7707
+ vertical-align: top;
7708
+ }
7709
+
7710
+ .dataframe thead th {
7711
+ text-align: right;
7712
+ }
7713
+ </style>
7714
+ <table border="1" class="dataframe">
7715
+ <thead>
7716
+ <tr style="text-align: right;">
7717
+ <th></th>
7718
+ <th>age</th>
7719
+ <th>gender</th>
7720
+ <th>cancer_type</th>
7721
+ <th>stage</th>
7722
+ </tr>
7723
+ </thead>
7724
+ <tbody>
7725
+ <tr>
7726
+ <th>count</th>
7727
+ <td>10.000000</td>
7728
+ <td>10</td>
7729
+ <td>10</td>
7730
+ <td>10</td>
7731
+ </tr>
7732
+ <tr>
7733
+ <th>unique</th>
7734
+ <td>NaN</td>
7735
+ <td>2</td>
7736
+ <td>5</td>
7737
+ <td>3</td>
7738
+ </tr>
7739
+ <tr>
7740
+ <th>top</th>
7741
+ <td>NaN</td>
7742
+ <td>F</td>
7743
+ <td>Breast</td>
7744
+ <td>II</td>
7745
+ </tr>
7746
+ <tr>
7747
+ <th>freq</th>
7748
+ <td>NaN</td>
7749
+ <td>5</td>
7750
+ <td>2</td>
7751
+ <td>4</td>
7752
+ </tr>
7753
+ <tr>
7754
+ <th>mean</th>
7755
+ <td>62.200000</td>
7756
+ <td>NaN</td>
7757
+ <td>NaN</td>
7758
+ <td>NaN</td>
7759
+ </tr>
7760
+ <tr>
7761
+ <th>std</th>
7762
+ <td>9.186947</td>
7763
+ <td>NaN</td>
7764
+ <td>NaN</td>
7765
+ <td>NaN</td>
7766
+ </tr>
7767
+ <tr>
7768
+ <th>min</th>
7769
+ <td>48.000000</td>
7770
+ <td>NaN</td>
7771
+ <td>NaN</td>
7772
+ <td>NaN</td>
7773
+ </tr>
7774
+ <tr>
7775
+ <th>25%</th>
7776
+ <td>56.000000</td>
7777
+ <td>NaN</td>
7778
+ <td>NaN</td>
7779
+ <td>NaN</td>
7780
+ </tr>
7781
+ <tr>
7782
+ <th>50%</th>
7783
+ <td>63.000000</td>
7784
+ <td>NaN</td>
7785
+ <td>NaN</td>
7786
+ <td>NaN</td>
7787
+ </tr>
7788
+ <tr>
7789
+ <th>75%</th>
7790
+ <td>69.250000</td>
7791
+ <td>NaN</td>
7792
+ <td>NaN</td>
7793
+ <td>NaN</td>
7794
+ </tr>
7795
+ <tr>
7796
+ <th>max</th>
7797
+ <td>75.000000</td>
7798
+ <td>NaN</td>
7799
+ <td>NaN</td>
7800
+ <td>NaN</td>
7801
+ </tr>
7802
+ </tbody>
7803
+ </table>
7804
+ </div>
7805
+ </div>
7806
+ </div>
7807
+ </div>
7808
+ </div>
7809
  </div>
7810
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=f872f204">
7811
  <div class="jp-Cell-inputWrapper" tabindex="0">
 
7817
  </div>
7818
  </div>
7819
  </div>
7820
+ </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell" id="cell-id=8b89480d">
7821
  <div class="jp-Cell-inputWrapper" tabindex="0">
7822
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7823
  </div>
7824
  <div class="jp-InputArea jp-Cell-inputArea">
7825
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [4]:</div>
7826
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7827
  <div class="cm-editor cm-s-jupyter">
7828
+ <div class="highlight hl-ipython3"><pre><span></span><span class="n">sns</span><span class="o">.</span><span class="n">countplot</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">df</span><span class="p">,</span> <span class="n">x</span><span class="o">=</span><span class="s1">'cancer_type'</span><span class="p">,</span> <span class="n">hue</span><span class="o">=</span><span class="s1">'gender'</span><span class="p">)</span>
7829
  <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Cancer Type by Gender'</span><span class="p">)</span>
7830
  <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
7831
  </pre></div>
 
7833
  </div>
7834
  </div>
7835
  </div>
7836
+ <div class="jp-Cell-outputWrapper">
7837
+ <div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
7838
+ </div>
7839
+ <div class="jp-OutputArea jp-Cell-outputArea">
7840
+ <div class="jp-OutputArea-child">
7841
+ <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
7842
+ <div class="jp-RenderedImage jp-OutputArea-output" tabindex="0">
7843
+ <img alt="No description has been provided for this image" class="" src=""/>
7844
+ </div>
7845
+ </div>
7846
+ </div>
7847
+ </div>
7848
  </div>
7849
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=d158309e">
7850
  <div class="jp-Cell-inputWrapper" tabindex="0">
 
7856
  </div>
7857
  </div>
7858
  </div>
7859
+ </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell" id="cell-id=a878a045">
7860
  <div class="jp-Cell-inputWrapper" tabindex="0">
7861
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
7862
  </div>
7863
  <div class="jp-InputArea jp-Cell-inputArea">
7864
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [5]:</div>
7865
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
7866
  <div class="cm-editor cm-s-jupyter">
7867
+ <div class="highlight hl-ipython3"><pre><span></span><span class="n">pd</span><span class="o">.</span><span class="n">crosstab</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'biomarker_status'</span><span class="p">],</span> <span class="n">df</span><span class="p">[</span><span class="s1">'treatment'</span><span class="p">])</span>
7868
  </pre></div>
7869
  </div>
7870
  </div>
7871
  </div>
7872
  </div>
7873
+ <div class="jp-Cell-outputWrapper">
7874
+ <div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
7875
+ </div>
7876
+ <div class="jp-OutputArea jp-Cell-outputArea">
7877
+ <div class="jp-OutputArea-child jp-OutputArea-executeResult">
7878
+ <div class="jp-OutputPrompt jp-OutputArea-prompt">Out[5]:</div>
7879
+ <div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/html" tabindex="0">
7880
+ <div>
7881
+ <style scoped="">
7882
+ .dataframe tbody tr th:only-of-type {
7883
+ vertical-align: middle;
7884
+ }
7885
+
7886
+ .dataframe tbody tr th {
7887
+ vertical-align: top;
7888
+ }
7889
+
7890
+ .dataframe thead th {
7891
+ text-align: right;
7892
+ }
7893
+ </style>
7894
+ <table border="1" class="dataframe">
7895
+ <thead>
7896
+ <tr style="text-align: right;">
7897
+ <th>treatment</th>
7898
+ <th>Abiraterone</th>
7899
+ <th>Alectinib</th>
7900
+ <th>Carboplatin</th>
7901
+ <th>Docetaxel</th>
7902
+ <th>FOLFIRI</th>
7903
+ <th>FOLFOX</th>
7904
+ <th>Osimertinib</th>
7905
+ <th>Paclitaxel</th>
7906
+ <th>Trastuzumab</th>
7907
+ </tr>
7908
+ <tr>
7909
+ <th>biomarker_status</th>
7910
+ <th></th>
7911
+ <th></th>
7912
+ <th></th>
7913
+ <th></th>
7914
+ <th></th>
7915
+ <th></th>
7916
+ <th></th>
7917
+ <th></th>
7918
+ <th></th>
7919
+ </tr>
7920
+ </thead>
7921
+ <tbody>
7922
+ <tr>
7923
+ <th>ALK+</th>
7924
+ <td>0</td>
7925
+ <td>1</td>
7926
+ <td>0</td>
7927
+ <td>0</td>
7928
+ <td>0</td>
7929
+ <td>0</td>
7930
+ <td>0</td>
7931
+ <td>0</td>
7932
+ <td>0</td>
7933
+ </tr>
7934
+ <tr>
7935
+ <th>AR+</th>
7936
+ <td>1</td>
7937
+ <td>0</td>
7938
+ <td>0</td>
7939
+ <td>0</td>
7940
+ <td>0</td>
7941
+ <td>0</td>
7942
+ <td>0</td>
7943
+ <td>0</td>
7944
+ <td>0</td>
7945
+ </tr>
7946
+ <tr>
7947
+ <th>AR-</th>
7948
+ <td>0</td>
7949
+ <td>0</td>
7950
+ <td>0</td>
7951
+ <td>1</td>
7952
+ <td>0</td>
7953
+ <td>0</td>
7954
+ <td>0</td>
7955
+ <td>0</td>
7956
+ <td>0</td>
7957
+ </tr>
7958
+ <tr>
7959
+ <th>BRAF+</th>
7960
+ <td>0</td>
7961
+ <td>0</td>
7962
+ <td>0</td>
7963
+ <td>0</td>
7964
+ <td>1</td>
7965
+ <td>0</td>
7966
+ <td>0</td>
7967
+ <td>0</td>
7968
+ <td>0</td>
7969
+ </tr>
7970
+ <tr>
7971
+ <th>BRAF-</th>
7972
+ <td>0</td>
7973
+ <td>0</td>
7974
+ <td>0</td>
7975
+ <td>0</td>
7976
+ <td>0</td>
7977
+ <td>1</td>
7978
+ <td>0</td>
7979
+ <td>0</td>
7980
+ <td>0</td>
7981
+ </tr>
7982
+ <tr>
7983
+ <th>BRCA1+</th>
7984
+ <td>0</td>
7985
+ <td>0</td>
7986
+ <td>1</td>
7987
+ <td>0</td>
7988
+ <td>0</td>
7989
+ <td>0</td>
7990
+ <td>0</td>
7991
+ <td>0</td>
7992
+ <td>0</td>
7993
+ </tr>
7994
+ <tr>
7995
+ <th>BRCA2-</th>
7996
+ <td>0</td>
7997
+ <td>0</td>
7998
+ <td>0</td>
7999
+ <td>0</td>
8000
+ <td>0</td>
8001
+ <td>0</td>
8002
+ <td>0</td>
8003
+ <td>1</td>
8004
+ <td>0</td>
8005
+ </tr>
8006
+ <tr>
8007
+ <th>EGFR+</th>
8008
+ <td>0</td>
8009
+ <td>0</td>
8010
+ <td>0</td>
8011
+ <td>0</td>
8012
+ <td>0</td>
8013
+ <td>0</td>
8014
+ <td>1</td>
8015
+ <td>0</td>
8016
+ <td>0</td>
8017
+ </tr>
8018
+ <tr>
8019
+ <th>HER2+</th>
8020
+ <td>0</td>
8021
+ <td>0</td>
8022
+ <td>0</td>
8023
+ <td>0</td>
8024
+ <td>0</td>
8025
+ <td>0</td>
8026
+ <td>0</td>
8027
+ <td>0</td>
8028
+ <td>1</td>
8029
+ </tr>
8030
+ <tr>
8031
+ <th>HER2-</th>
8032
+ <td>0</td>
8033
+ <td>0</td>
8034
+ <td>0</td>
8035
+ <td>0</td>
8036
+ <td>0</td>
8037
+ <td>0</td>
8038
+ <td>0</td>
8039
+ <td>1</td>
8040
+ <td>0</td>
8041
+ </tr>
8042
+ </tbody>
8043
+ </table>
8044
+ </div>
8045
+ </div>
8046
+ </div>
8047
+ </div>
8048
+ </div>
8049
  </div>
8050
  <div class="jp-Cell jp-MarkdownCell jp-Notebook-cell" id="cell-id=50dd531c">
8051
  <div class="jp-Cell-inputWrapper" tabindex="0">
 
8057
  </div>
8058
  </div>
8059
  </div>
8060
+ </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell" id="cell-id=8a9cf12f">
8061
  <div class="jp-Cell-inputWrapper" tabindex="0">
8062
  <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
8063
  </div>
8064
  <div class="jp-InputArea jp-Cell-inputArea">
8065
+ <div class="jp-InputPrompt jp-InputArea-prompt">In [6]:</div>
8066
  <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
8067
  <div class="cm-editor cm-s-jupyter">
8068
+ <div class="highlight hl-ipython3"><pre><span></span><span class="n">sns</span><span class="o">.</span><span class="n">histplot</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s1">'survival_months'</span><span class="p">],</span> <span class="n">bins</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">kde</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
8069
  <span class="n">plt</span><span class="o">.</span><span class="n">xlabel</span><span class="p">(</span><span class="s1">'Survival (months)'</span><span class="p">)</span>
8070
  <span class="n">plt</span><span class="o">.</span><span class="n">title</span><span class="p">(</span><span class="s1">'Distribution of Survival Time'</span><span class="p">)</span>
8071
  <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 
8074
  </div>
8075
  </div>
8076
  </div>
8077
+ <div class="jp-Cell-outputWrapper">
8078
+ <div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
8079
+ </div>
8080
+ <div class="jp-OutputArea jp-Cell-outputArea">
8081
+ <div class="jp-OutputArea-child">
8082
+ <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
8083
+ <div class="jp-RenderedImage jp-OutputArea-output" tabindex="0">
8084
+ <img alt="No description has been provided for this image" class="" src=""/>
8085
+ </div>
8086
+ </div>
8087
+ </div>
8088
+ </div>
8089
  </div>
8090
  </main>
8091
  </body>
notebooks/01_data_exploration.ipynb CHANGED
@@ -11,7 +11,7 @@
11
  },
12
  {
13
  "cell_type": "code",
14
- "execution_count": null,
15
  "id": "a51fb3f6",
16
  "metadata": {},
17
  "outputs": [],
@@ -24,10 +24,134 @@
24
  },
25
  {
26
  "cell_type": "code",
27
- "execution_count": null,
28
  "id": "48479ace",
29
  "metadata": {},
30
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "source": [
32
  "# Load synthetic data\n",
33
  "df = pd.read_csv('../data/synthetic_oncology_patients.csv')\n",
@@ -44,10 +168,139 @@
44
  },
45
  {
46
  "cell_type": "code",
47
- "execution_count": null,
48
  "id": "525868d7",
49
  "metadata": {},
50
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "source": [
52
  "# Demographic summary\n",
53
  "df[['age', 'gender', 'cancer_type', 'stage']].describe(include='all')"
@@ -63,10 +316,21 @@
63
  },
64
  {
65
  "cell_type": "code",
66
- "execution_count": null,
67
  "id": "8b89480d",
68
  "metadata": {},
69
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
70
  "source": [
71
  "sns.countplot(data=df, x='cancer_type', hue='gender')\n",
72
  "plt.title('Cancer Type by Gender')\n",
@@ -83,10 +347,212 @@
83
  },
84
  {
85
  "cell_type": "code",
86
- "execution_count": null,
87
  "id": "a878a045",
88
  "metadata": {},
89
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  "source": [
91
  "pd.crosstab(df['biomarker_status'], df['treatment'])"
92
  ]
@@ -101,10 +567,21 @@
101
  },
102
  {
103
  "cell_type": "code",
104
- "execution_count": null,
105
  "id": "8a9cf12f",
106
  "metadata": {},
107
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
108
  "source": [
109
  "sns.histplot(df['survival_months'], bins=8, kde=True)\n",
110
  "plt.xlabel('Survival (months)')\n",
@@ -114,8 +591,22 @@
114
  }
115
  ],
116
  "metadata": {
 
 
 
 
 
117
  "language_info": {
118
- "name": "python"
 
 
 
 
 
 
 
 
 
119
  }
120
  },
121
  "nbformat": 4,
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": 1,
15
  "id": "a51fb3f6",
16
  "metadata": {},
17
  "outputs": [],
 
24
  },
25
  {
26
  "cell_type": "code",
27
+ "execution_count": 2,
28
  "id": "48479ace",
29
  "metadata": {},
30
+ "outputs": [
31
+ {
32
+ "data": {
33
+ "text/html": [
34
+ "<div>\n",
35
+ "<style scoped>\n",
36
+ " .dataframe tbody tr th:only-of-type {\n",
37
+ " vertical-align: middle;\n",
38
+ " }\n",
39
+ "\n",
40
+ " .dataframe tbody tr th {\n",
41
+ " vertical-align: top;\n",
42
+ " }\n",
43
+ "\n",
44
+ " .dataframe thead th {\n",
45
+ " text-align: right;\n",
46
+ " }\n",
47
+ "</style>\n",
48
+ "<table border=\"1\" class=\"dataframe\">\n",
49
+ " <thead>\n",
50
+ " <tr style=\"text-align: right;\">\n",
51
+ " <th></th>\n",
52
+ " <th>patient_id</th>\n",
53
+ " <th>age</th>\n",
54
+ " <th>gender</th>\n",
55
+ " <th>cancer_type</th>\n",
56
+ " <th>stage</th>\n",
57
+ " <th>diagnosis_date</th>\n",
58
+ " <th>biomarker_status</th>\n",
59
+ " <th>treatment</th>\n",
60
+ " <th>adverse_event</th>\n",
61
+ " <th>survival_months</th>\n",
62
+ " </tr>\n",
63
+ " </thead>\n",
64
+ " <tbody>\n",
65
+ " <tr>\n",
66
+ " <th>0</th>\n",
67
+ " <td>P001</td>\n",
68
+ " <td>67</td>\n",
69
+ " <td>F</td>\n",
70
+ " <td>Breast</td>\n",
71
+ " <td>II</td>\n",
72
+ " <td>2021-03-15</td>\n",
73
+ " <td>HER2+</td>\n",
74
+ " <td>Trastuzumab</td>\n",
75
+ " <td>NaN</td>\n",
76
+ " <td>28</td>\n",
77
+ " </tr>\n",
78
+ " <tr>\n",
79
+ " <th>1</th>\n",
80
+ " <td>P002</td>\n",
81
+ " <td>59</td>\n",
82
+ " <td>M</td>\n",
83
+ " <td>Lung</td>\n",
84
+ " <td>III</td>\n",
85
+ " <td>2020-11-02</td>\n",
86
+ " <td>EGFR+</td>\n",
87
+ " <td>Osimertinib</td>\n",
88
+ " <td>Rash</td>\n",
89
+ " <td>18</td>\n",
90
+ " </tr>\n",
91
+ " <tr>\n",
92
+ " <th>2</th>\n",
93
+ " <td>P003</td>\n",
94
+ " <td>72</td>\n",
95
+ " <td>F</td>\n",
96
+ " <td>Colorectal</td>\n",
97
+ " <td>IV</td>\n",
98
+ " <td>2019-07-21</td>\n",
99
+ " <td>BRAF-</td>\n",
100
+ " <td>FOLFOX</td>\n",
101
+ " <td>Neuropathy</td>\n",
102
+ " <td>12</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>3</th>\n",
106
+ " <td>P004</td>\n",
107
+ " <td>50</td>\n",
108
+ " <td>M</td>\n",
109
+ " <td>Prostate</td>\n",
110
+ " <td>II</td>\n",
111
+ " <td>2022-01-10</td>\n",
112
+ " <td>AR+</td>\n",
113
+ " <td>Abiraterone</td>\n",
114
+ " <td>NaN</td>\n",
115
+ " <td>30</td>\n",
116
+ " </tr>\n",
117
+ " <tr>\n",
118
+ " <th>4</th>\n",
119
+ " <td>P005</td>\n",
120
+ " <td>64</td>\n",
121
+ " <td>F</td>\n",
122
+ " <td>Ovarian</td>\n",
123
+ " <td>III</td>\n",
124
+ " <td>2021-06-18</td>\n",
125
+ " <td>BRCA1+</td>\n",
126
+ " <td>Carboplatin</td>\n",
127
+ " <td>Neutropenia</td>\n",
128
+ " <td>22</td>\n",
129
+ " </tr>\n",
130
+ " </tbody>\n",
131
+ "</table>\n",
132
+ "</div>"
133
+ ],
134
+ "text/plain": [
135
+ " patient_id age gender cancer_type stage diagnosis_date biomarker_status \\\n",
136
+ "0 P001 67 F Breast II 2021-03-15 HER2+ \n",
137
+ "1 P002 59 M Lung III 2020-11-02 EGFR+ \n",
138
+ "2 P003 72 F Colorectal IV 2019-07-21 BRAF- \n",
139
+ "3 P004 50 M Prostate II 2022-01-10 AR+ \n",
140
+ "4 P005 64 F Ovarian III 2021-06-18 BRCA1+ \n",
141
+ "\n",
142
+ " treatment adverse_event survival_months \n",
143
+ "0 Trastuzumab NaN 28 \n",
144
+ "1 Osimertinib Rash 18 \n",
145
+ "2 FOLFOX Neuropathy 12 \n",
146
+ "3 Abiraterone NaN 30 \n",
147
+ "4 Carboplatin Neutropenia 22 "
148
+ ]
149
+ },
150
+ "execution_count": 2,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
  "source": [
156
  "# Load synthetic data\n",
157
  "df = pd.read_csv('../data/synthetic_oncology_patients.csv')\n",
 
168
  },
169
  {
170
  "cell_type": "code",
171
+ "execution_count": 3,
172
  "id": "525868d7",
173
  "metadata": {},
174
+ "outputs": [
175
+ {
176
+ "data": {
177
+ "text/html": [
178
+ "<div>\n",
179
+ "<style scoped>\n",
180
+ " .dataframe tbody tr th:only-of-type {\n",
181
+ " vertical-align: middle;\n",
182
+ " }\n",
183
+ "\n",
184
+ " .dataframe tbody tr th {\n",
185
+ " vertical-align: top;\n",
186
+ " }\n",
187
+ "\n",
188
+ " .dataframe thead th {\n",
189
+ " text-align: right;\n",
190
+ " }\n",
191
+ "</style>\n",
192
+ "<table border=\"1\" class=\"dataframe\">\n",
193
+ " <thead>\n",
194
+ " <tr style=\"text-align: right;\">\n",
195
+ " <th></th>\n",
196
+ " <th>age</th>\n",
197
+ " <th>gender</th>\n",
198
+ " <th>cancer_type</th>\n",
199
+ " <th>stage</th>\n",
200
+ " </tr>\n",
201
+ " </thead>\n",
202
+ " <tbody>\n",
203
+ " <tr>\n",
204
+ " <th>count</th>\n",
205
+ " <td>10.000000</td>\n",
206
+ " <td>10</td>\n",
207
+ " <td>10</td>\n",
208
+ " <td>10</td>\n",
209
+ " </tr>\n",
210
+ " <tr>\n",
211
+ " <th>unique</th>\n",
212
+ " <td>NaN</td>\n",
213
+ " <td>2</td>\n",
214
+ " <td>5</td>\n",
215
+ " <td>3</td>\n",
216
+ " </tr>\n",
217
+ " <tr>\n",
218
+ " <th>top</th>\n",
219
+ " <td>NaN</td>\n",
220
+ " <td>F</td>\n",
221
+ " <td>Breast</td>\n",
222
+ " <td>II</td>\n",
223
+ " </tr>\n",
224
+ " <tr>\n",
225
+ " <th>freq</th>\n",
226
+ " <td>NaN</td>\n",
227
+ " <td>5</td>\n",
228
+ " <td>2</td>\n",
229
+ " <td>4</td>\n",
230
+ " </tr>\n",
231
+ " <tr>\n",
232
+ " <th>mean</th>\n",
233
+ " <td>62.200000</td>\n",
234
+ " <td>NaN</td>\n",
235
+ " <td>NaN</td>\n",
236
+ " <td>NaN</td>\n",
237
+ " </tr>\n",
238
+ " <tr>\n",
239
+ " <th>std</th>\n",
240
+ " <td>9.186947</td>\n",
241
+ " <td>NaN</td>\n",
242
+ " <td>NaN</td>\n",
243
+ " <td>NaN</td>\n",
244
+ " </tr>\n",
245
+ " <tr>\n",
246
+ " <th>min</th>\n",
247
+ " <td>48.000000</td>\n",
248
+ " <td>NaN</td>\n",
249
+ " <td>NaN</td>\n",
250
+ " <td>NaN</td>\n",
251
+ " </tr>\n",
252
+ " <tr>\n",
253
+ " <th>25%</th>\n",
254
+ " <td>56.000000</td>\n",
255
+ " <td>NaN</td>\n",
256
+ " <td>NaN</td>\n",
257
+ " <td>NaN</td>\n",
258
+ " </tr>\n",
259
+ " <tr>\n",
260
+ " <th>50%</th>\n",
261
+ " <td>63.000000</td>\n",
262
+ " <td>NaN</td>\n",
263
+ " <td>NaN</td>\n",
264
+ " <td>NaN</td>\n",
265
+ " </tr>\n",
266
+ " <tr>\n",
267
+ " <th>75%</th>\n",
268
+ " <td>69.250000</td>\n",
269
+ " <td>NaN</td>\n",
270
+ " <td>NaN</td>\n",
271
+ " <td>NaN</td>\n",
272
+ " </tr>\n",
273
+ " <tr>\n",
274
+ " <th>max</th>\n",
275
+ " <td>75.000000</td>\n",
276
+ " <td>NaN</td>\n",
277
+ " <td>NaN</td>\n",
278
+ " <td>NaN</td>\n",
279
+ " </tr>\n",
280
+ " </tbody>\n",
281
+ "</table>\n",
282
+ "</div>"
283
+ ],
284
+ "text/plain": [
285
+ " age gender cancer_type stage\n",
286
+ "count 10.000000 10 10 10\n",
287
+ "unique NaN 2 5 3\n",
288
+ "top NaN F Breast II\n",
289
+ "freq NaN 5 2 4\n",
290
+ "mean 62.200000 NaN NaN NaN\n",
291
+ "std 9.186947 NaN NaN NaN\n",
292
+ "min 48.000000 NaN NaN NaN\n",
293
+ "25% 56.000000 NaN NaN NaN\n",
294
+ "50% 63.000000 NaN NaN NaN\n",
295
+ "75% 69.250000 NaN NaN NaN\n",
296
+ "max 75.000000 NaN NaN NaN"
297
+ ]
298
+ },
299
+ "execution_count": 3,
300
+ "metadata": {},
301
+ "output_type": "execute_result"
302
+ }
303
+ ],
304
  "source": [
305
  "# Demographic summary\n",
306
  "df[['age', 'gender', 'cancer_type', 'stage']].describe(include='all')"
 
316
  },
317
  {
318
  "cell_type": "code",
319
+ "execution_count": 4,
320
  "id": "8b89480d",
321
  "metadata": {},
322
+ "outputs": [
323
+ {
324
+ "data": {
325
+ "image/png": "",
326
+ "text/plain": [
327
+ "<Figure size 640x480 with 1 Axes>"
328
+ ]
329
+ },
330
+ "metadata": {},
331
+ "output_type": "display_data"
332
+ }
333
+ ],
334
  "source": [
335
  "sns.countplot(data=df, x='cancer_type', hue='gender')\n",
336
  "plt.title('Cancer Type by Gender')\n",
 
347
  },
348
  {
349
  "cell_type": "code",
350
+ "execution_count": 5,
351
  "id": "a878a045",
352
  "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "data": {
356
+ "text/html": [
357
+ "<div>\n",
358
+ "<style scoped>\n",
359
+ " .dataframe tbody tr th:only-of-type {\n",
360
+ " vertical-align: middle;\n",
361
+ " }\n",
362
+ "\n",
363
+ " .dataframe tbody tr th {\n",
364
+ " vertical-align: top;\n",
365
+ " }\n",
366
+ "\n",
367
+ " .dataframe thead th {\n",
368
+ " text-align: right;\n",
369
+ " }\n",
370
+ "</style>\n",
371
+ "<table border=\"1\" class=\"dataframe\">\n",
372
+ " <thead>\n",
373
+ " <tr style=\"text-align: right;\">\n",
374
+ " <th>treatment</th>\n",
375
+ " <th>Abiraterone</th>\n",
376
+ " <th>Alectinib</th>\n",
377
+ " <th>Carboplatin</th>\n",
378
+ " <th>Docetaxel</th>\n",
379
+ " <th>FOLFIRI</th>\n",
380
+ " <th>FOLFOX</th>\n",
381
+ " <th>Osimertinib</th>\n",
382
+ " <th>Paclitaxel</th>\n",
383
+ " <th>Trastuzumab</th>\n",
384
+ " </tr>\n",
385
+ " <tr>\n",
386
+ " <th>biomarker_status</th>\n",
387
+ " <th></th>\n",
388
+ " <th></th>\n",
389
+ " <th></th>\n",
390
+ " <th></th>\n",
391
+ " <th></th>\n",
392
+ " <th></th>\n",
393
+ " <th></th>\n",
394
+ " <th></th>\n",
395
+ " <th></th>\n",
396
+ " </tr>\n",
397
+ " </thead>\n",
398
+ " <tbody>\n",
399
+ " <tr>\n",
400
+ " <th>ALK+</th>\n",
401
+ " <td>0</td>\n",
402
+ " <td>1</td>\n",
403
+ " <td>0</td>\n",
404
+ " <td>0</td>\n",
405
+ " <td>0</td>\n",
406
+ " <td>0</td>\n",
407
+ " <td>0</td>\n",
408
+ " <td>0</td>\n",
409
+ " <td>0</td>\n",
410
+ " </tr>\n",
411
+ " <tr>\n",
412
+ " <th>AR+</th>\n",
413
+ " <td>1</td>\n",
414
+ " <td>0</td>\n",
415
+ " <td>0</td>\n",
416
+ " <td>0</td>\n",
417
+ " <td>0</td>\n",
418
+ " <td>0</td>\n",
419
+ " <td>0</td>\n",
420
+ " <td>0</td>\n",
421
+ " <td>0</td>\n",
422
+ " </tr>\n",
423
+ " <tr>\n",
424
+ " <th>AR-</th>\n",
425
+ " <td>0</td>\n",
426
+ " <td>0</td>\n",
427
+ " <td>0</td>\n",
428
+ " <td>1</td>\n",
429
+ " <td>0</td>\n",
430
+ " <td>0</td>\n",
431
+ " <td>0</td>\n",
432
+ " <td>0</td>\n",
433
+ " <td>0</td>\n",
434
+ " </tr>\n",
435
+ " <tr>\n",
436
+ " <th>BRAF+</th>\n",
437
+ " <td>0</td>\n",
438
+ " <td>0</td>\n",
439
+ " <td>0</td>\n",
440
+ " <td>0</td>\n",
441
+ " <td>1</td>\n",
442
+ " <td>0</td>\n",
443
+ " <td>0</td>\n",
444
+ " <td>0</td>\n",
445
+ " <td>0</td>\n",
446
+ " </tr>\n",
447
+ " <tr>\n",
448
+ " <th>BRAF-</th>\n",
449
+ " <td>0</td>\n",
450
+ " <td>0</td>\n",
451
+ " <td>0</td>\n",
452
+ " <td>0</td>\n",
453
+ " <td>0</td>\n",
454
+ " <td>1</td>\n",
455
+ " <td>0</td>\n",
456
+ " <td>0</td>\n",
457
+ " <td>0</td>\n",
458
+ " </tr>\n",
459
+ " <tr>\n",
460
+ " <th>BRCA1+</th>\n",
461
+ " <td>0</td>\n",
462
+ " <td>0</td>\n",
463
+ " <td>1</td>\n",
464
+ " <td>0</td>\n",
465
+ " <td>0</td>\n",
466
+ " <td>0</td>\n",
467
+ " <td>0</td>\n",
468
+ " <td>0</td>\n",
469
+ " <td>0</td>\n",
470
+ " </tr>\n",
471
+ " <tr>\n",
472
+ " <th>BRCA2-</th>\n",
473
+ " <td>0</td>\n",
474
+ " <td>0</td>\n",
475
+ " <td>0</td>\n",
476
+ " <td>0</td>\n",
477
+ " <td>0</td>\n",
478
+ " <td>0</td>\n",
479
+ " <td>0</td>\n",
480
+ " <td>1</td>\n",
481
+ " <td>0</td>\n",
482
+ " </tr>\n",
483
+ " <tr>\n",
484
+ " <th>EGFR+</th>\n",
485
+ " <td>0</td>\n",
486
+ " <td>0</td>\n",
487
+ " <td>0</td>\n",
488
+ " <td>0</td>\n",
489
+ " <td>0</td>\n",
490
+ " <td>0</td>\n",
491
+ " <td>1</td>\n",
492
+ " <td>0</td>\n",
493
+ " <td>0</td>\n",
494
+ " </tr>\n",
495
+ " <tr>\n",
496
+ " <th>HER2+</th>\n",
497
+ " <td>0</td>\n",
498
+ " <td>0</td>\n",
499
+ " <td>0</td>\n",
500
+ " <td>0</td>\n",
501
+ " <td>0</td>\n",
502
+ " <td>0</td>\n",
503
+ " <td>0</td>\n",
504
+ " <td>0</td>\n",
505
+ " <td>1</td>\n",
506
+ " </tr>\n",
507
+ " <tr>\n",
508
+ " <th>HER2-</th>\n",
509
+ " <td>0</td>\n",
510
+ " <td>0</td>\n",
511
+ " <td>0</td>\n",
512
+ " <td>0</td>\n",
513
+ " <td>0</td>\n",
514
+ " <td>0</td>\n",
515
+ " <td>0</td>\n",
516
+ " <td>1</td>\n",
517
+ " <td>0</td>\n",
518
+ " </tr>\n",
519
+ " </tbody>\n",
520
+ "</table>\n",
521
+ "</div>"
522
+ ],
523
+ "text/plain": [
524
+ "treatment Abiraterone Alectinib Carboplatin Docetaxel FOLFIRI \\\n",
525
+ "biomarker_status \n",
526
+ "ALK+ 0 1 0 0 0 \n",
527
+ "AR+ 1 0 0 0 0 \n",
528
+ "AR- 0 0 0 1 0 \n",
529
+ "BRAF+ 0 0 0 0 1 \n",
530
+ "BRAF- 0 0 0 0 0 \n",
531
+ "BRCA1+ 0 0 1 0 0 \n",
532
+ "BRCA2- 0 0 0 0 0 \n",
533
+ "EGFR+ 0 0 0 0 0 \n",
534
+ "HER2+ 0 0 0 0 0 \n",
535
+ "HER2- 0 0 0 0 0 \n",
536
+ "\n",
537
+ "treatment FOLFOX Osimertinib Paclitaxel Trastuzumab \n",
538
+ "biomarker_status \n",
539
+ "ALK+ 0 0 0 0 \n",
540
+ "AR+ 0 0 0 0 \n",
541
+ "AR- 0 0 0 0 \n",
542
+ "BRAF+ 0 0 0 0 \n",
543
+ "BRAF- 1 0 0 0 \n",
544
+ "BRCA1+ 0 0 0 0 \n",
545
+ "BRCA2- 0 0 1 0 \n",
546
+ "EGFR+ 0 1 0 0 \n",
547
+ "HER2+ 0 0 0 1 \n",
548
+ "HER2- 0 0 1 0 "
549
+ ]
550
+ },
551
+ "execution_count": 5,
552
+ "metadata": {},
553
+ "output_type": "execute_result"
554
+ }
555
+ ],
556
  "source": [
557
  "pd.crosstab(df['biomarker_status'], df['treatment'])"
558
  ]
 
567
  },
568
  {
569
  "cell_type": "code",
570
+ "execution_count": 6,
571
  "id": "8a9cf12f",
572
  "metadata": {},
573
+ "outputs": [
574
+ {
575
+ "data": {
576
+ "image/png": "",
577
+ "text/plain": [
578
+ "<Figure size 640x480 with 1 Axes>"
579
+ ]
580
+ },
581
+ "metadata": {},
582
+ "output_type": "display_data"
583
+ }
584
+ ],
585
  "source": [
586
  "sns.histplot(df['survival_months'], bins=8, kde=True)\n",
587
  "plt.xlabel('Survival (months)')\n",
 
591
  }
592
  ],
593
  "metadata": {
594
+ "kernelspec": {
595
+ "display_name": ".venv",
596
+ "language": "python",
597
+ "name": "python3"
598
+ },
599
  "language_info": {
600
+ "codemirror_mode": {
601
+ "name": "ipython",
602
+ "version": 3
603
+ },
604
+ "file_extension": ".py",
605
+ "mimetype": "text/x-python",
606
+ "name": "python",
607
+ "nbconvert_exporter": "python",
608
+ "pygments_lexer": "ipython3",
609
+ "version": "3.13.7"
610
  }
611
  },
612
  "nbformat": 4,