Qarlsberg commited on
Commit
0597022
1 Parent(s): 15655d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -14
app.py CHANGED
@@ -63,6 +63,12 @@ with st.sidebar:
63
  n_neighbors = st.slider('Number of neighbors', min_value=5, max_value=100, value=15, step=5,help="Number of Neighbors (n_neighbors): This parameter controls how UMAP balances local versus global structure in the data. It determines the number of neighboring points used in the local manifold approximations. A higher value considers more neighbors for a broader view of the data structure, while a lower value focuses on the local neighborhood, emphasizing finer details.")
64
  min_dist = st.slider('Minimum distance', min_value=0.0, max_value=0.99, value=0.1, step=0.01,help="Minimum Distance (min_dist): This parameter controls how tightly UMAP is allowed to pack points together. It determines the minimum distance between points in the low-dimensional representation. A lower value allows UMAP to pack points more tightly, while a higher value spreads points out more evenly.")
65
 
 
 
 
 
 
 
66
  submitted_umap = st.form_submit_button("Start Umap", type="primary")
67
  if not submitted_umap:
68
  st.error("Press start Umap to start")
@@ -74,20 +80,6 @@ data_2d = reduce_dimensions(data, 2, n_neighbors, min_dist)
74
  fig_2d = px.scatter(x=data_2d[:, 0], y=data_2d[:, 1], color=labels, title="2D UMAP", color_continuous_scale=px.colors.qualitative.Set1)
75
  st.plotly_chart(fig_2d)
76
 
77
-
78
- with st.sidebar:
79
- with st.form("my_form_hdbscan", border=False):
80
-
81
- # HDBSCAN parameters adjustable via sliders
82
- st.header('HDBSCAN Parameters')
83
- min_cluster_size = st.slider('Minimum cluster size', min_value=5, max_value=200, value=30, step=5,help="Minimum Cluster Size (min_cluster_size): This parameter sets the minimum size of clusters. It determines the smallest number of points that can form a cluster. A higher value will result in fewer clusters, while a lower value will result in more clusters.")
84
- min_samples = st.slider('Minimum samples', min_value=1, max_value=20, value=5, step=1,help="Minimum Samples (min_samples): This parameter sets the number of samples in a neighborhood for a point to be considered as a core point. It determines the minimum number of points required to form a cluster. A higher value will result in fewer points being considered as core points, while a lower value will result in more points being considered as core points.")
85
- submitted_hdbscan = st.form_submit_button("Start HDBScan", type="primary")
86
-
87
- if not submitted_hdbscan:
88
- st.error("Press start HDBScan to star model")
89
- st.stop()
90
-
91
  # HDBSCAN Clustering
92
  st.subheader('HDBSCAN Clustering', help="HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise) is a density-based clustering algorithm. It is based on the idea that clusters are dense groups of points separated by regions of lower density. The algorithm finds clusters by looking for areas of the data that have a high density of points, separated by areas of low density. It is particularly useful for finding clusters of varying density in large spatial data. HDBSCAN is a powerful tool for clustering and visualizing high-dimensional data.")
93
  clusters = perform_clustering(data_2d, min_samples, min_cluster_size)
 
63
  n_neighbors = st.slider('Number of neighbors', min_value=5, max_value=100, value=15, step=5,help="Number of Neighbors (n_neighbors): This parameter controls how UMAP balances local versus global structure in the data. It determines the number of neighboring points used in the local manifold approximations. A higher value considers more neighbors for a broader view of the data structure, while a lower value focuses on the local neighborhood, emphasizing finer details.")
64
  min_dist = st.slider('Minimum distance', min_value=0.0, max_value=0.99, value=0.1, step=0.01,help="Minimum Distance (min_dist): This parameter controls how tightly UMAP is allowed to pack points together. It determines the minimum distance between points in the low-dimensional representation. A lower value allows UMAP to pack points more tightly, while a higher value spreads points out more evenly.")
65
 
66
+ # HDBSCAN parameters adjustable via sliders
67
+ st.header('HDBSCAN Parameters')
68
+ min_cluster_size = st.slider('Minimum cluster size', min_value=5, max_value=200, value=30, step=5,help="Minimum Cluster Size (min_cluster_size): This parameter sets the minimum size of clusters. It determines the smallest number of points that can form a cluster. A higher value will result in fewer clusters, while a lower value will result in more clusters.")
69
+ min_samples = st.slider('Minimum samples', min_value=1, max_value=20, value=5, step=1,help="Minimum Samples (min_samples): This parameter sets the number of samples in a neighborhood for a point to be considered as a core point. It determines the minimum number of points required to form a cluster. A higher value will result in fewer points being considered as core points, while a lower value will result in more points being considered as core points.")
70
+ submitted_hdbscan = st.form_submit_button("Start HDBScan", type="primary")
71
+
72
  submitted_umap = st.form_submit_button("Start Umap", type="primary")
73
  if not submitted_umap:
74
  st.error("Press start Umap to start")
 
80
  fig_2d = px.scatter(x=data_2d[:, 0], y=data_2d[:, 1], color=labels, title="2D UMAP", color_continuous_scale=px.colors.qualitative.Set1)
81
  st.plotly_chart(fig_2d)
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # HDBSCAN Clustering
84
  st.subheader('HDBSCAN Clustering', help="HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise) is a density-based clustering algorithm. It is based on the idea that clusters are dense groups of points separated by regions of lower density. The algorithm finds clusters by looking for areas of the data that have a high density of points, separated by areas of low density. It is particularly useful for finding clusters of varying density in large spatial data. HDBSCAN is a powerful tool for clustering and visualizing high-dimensional data.")
85
  clusters = perform_clustering(data_2d, min_samples, min_cluster_size)