Spaces:

thov
/

correlationGraph

Sleeping

thov commited on Nov 28, 2023

Commit

775aa8b

•

1 Parent(s): 843c970

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -8,12 +8,16 @@ import networkx as nx
 def clean_csv_file(csv_file):
     df = pd.read_csv(csv_file)
     return df
-def build_graph(csv_file, threshold):
     features = clean_csv_file(csv_file)
-    links = features.corr(method='kendall').fillna(0).stack().reset_index()
     links.columns = ['var1', 'var2', 'value']
     # Keep only correlation over a threshold
@@ -46,9 +50,9 @@ class MplColorHelper:
         return f"rgb({r},{g},{b})"
-def display_graph(csv_file, threshold):
-    G = build_graph(csv_file, threshold=threshold)
     CM_NAME = "Wistia"
@@ -59,6 +63,7 @@ def display_graph(csv_file, threshold):
     # get rgb string for each node
     for u, v, data in G.edges(data=True):
         data['color'] = edge_colors.get_rgb_str(data['corr_value'])
     disp = gv.d3(
             G,
             # graph specs

 def clean_csv_file(csv_file):
     df = pd.read_csv(csv_file)
+    df.dropna(inplace=True)
+    df.drop_duplicates(inplace=True)
     return df
+def build_graph(csv_file, *, threshold, corr_type):
     features = clean_csv_file(csv_file)
+    links = features.corr(method=corr_type).fillna(0).stack().reset_index()
     links.columns = ['var1', 'var2', 'value']
     # Keep only correlation over a threshold
         return f"rgb({r},{g},{b})"
+def display_graph(csv_file, *, threshold, corr_type):
+    G = build_graph(csv_file, threshold=threshold, corr_type=corr_type)
     CM_NAME = "Wistia"
     # get rgb string for each node
     for u, v, data in G.edges(data=True):
         data['color'] = edge_colors.get_rgb_str(data['corr_value'])
     disp = gv.d3(
             G,
             # graph specs